--------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive/Resul
> ts/log/inaccurate_discrimination.log
  log type:  text
 opened on:  12 Jul 2023, 15:46:29

. 
. ***************************************************************************************************
. *  1. Insheet and Clean Qualtrics Survey Output
. ***************************************************************************************************
. 
. ***********************************************
. * Worker Task (Trivia - Survey 1) survey output
. ***********************************************
. 
. * Insheet, rename, and label the Qualtrics output fields
. insheet using "${raw}Qualtrics-Part1.csv", comma name clear
(59 vars, 604 obs)

. rename responseid id_worker

. rename durationinseconds duration_worker

. rename q53 color

. rename q54 movie

. rename q55 coffeetea

. rename q56 age_worker

. rename q57 gender_worker

. rename q58 subject

. rename q59 sport

. label var duration_worker "Survey 1 Duration (Seconds)"

. label var id_worker "ID of Worker"

. label var color "Favorite Color"

. label var movie "Favorite Movie"

. label var subject "Favorite Subject"

. label var sport "Favorite Sport"

. label var coffeetea "Prefer Coffee or Tea"

. label var age_worker "Age (Worker)"

. label var gender_worker "Gender (Worker)"

. 
. * The correct answer happens to be the modal answer in all cases, so can flag it and assign score
. * based on this variable. 
. forval i=2/51{
  2.     bysort q`i': gen answerfrequency_q`i'=_N 
  3.     egen maxanswer_q`i'=max(answerfrequency_q`i')
  4.     gen q`i'_correct=(maxanswer_q`i'==answerfrequency_q`i')
  5.     drop maxanswer_q`i' answerfrequency_q`i'
  6. }

. egen score=rowtotal(q2_correct - q51_correct)

. sum score, detail

                            score
-------------------------------------------------------------
      Percentiles      Smallest
 1%           13              9
 5%           19              9
10%           23             11       Obs                 604
25%           32             12       Sum of wgt.         604

50%           39                      Mean           36.68709
                        Largest       Std. dev.      8.967221
75%           44             50
90%           47             50       Variance       80.41104
95%           48             50       Skewness      -.8012934
99%           49             50       Kurtosis        2.97207

. drop q2-q51 q2_correct-q51_correct

. label var score "Trivia score"

. 
. * Save the dataset and then insheet & merge with the MTurk payments 
. * This drops the 5 people who didn't successfully submit the survey, leaving 599 respondents
. * NOTE: MTurk Worker IDs (PII) have been manually removed from the MTurk payments files
. save "${clean}cleaned_worker.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/cleaned_worker.dta saved

. 
. insheet using "${raw}MTurk-US-Part1.csv", comma name clear
(3 vars, 401 obs)

. rename answersurveycode id_worker

. gen country_worker="United States"

. keep if assignmentstatus=="Approved"
(2 observations deleted)

. keep id_worker country_worker

. save "${clean}mturk_worker_US.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_worker_US.dta saved

. 
. insheet using "${raw}MTurk-India-Part1.csv", comma name clear 
(27 vars, 200 obs)

. rename answersurveycode id_worker

. gen country_worker="India"

. keep if assignmentstatus=="Approved"
(0 observations deleted)

. keep id_worker country_worker

. save "${clean}mturk_worker_India.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_worker_India.dta saved

. 
. append using "${clean}mturk_worker_US.dta"
(variable country_worker was str5, now str13 to accommodate using data's values)

. label var country_worker "Country (Worker)"

. sort id_worker

. save "${clean}mturk_worker.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_worker.dta saved

. 
. use "${clean}cleaned_worker.dta", clear

. merge 1:1 id_worker using "${clean}mturk_worker.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                             5
        from master                         5  (_merge==1)
        from using                          0  (_merge==2)

    Matched                               599  (_merge==3)
    -----------------------------------------

. 
. * Drop bottom 1% (<229 seconds) and top 99% (>3274 seconds) of data based on duration 
. * Drops 12 submissions, leaving 589 respondents
. * This is to match the R code written by our RA (i.e. to construct the data shown to employers)
. sum duration_worker, detail

                 Survey 1 Duration (Seconds)
-------------------------------------------------------------
      Percentiles      Smallest
 1%          229             43
 5%          368             92
10%          488            171       Obs                 604
25%          693            214       Sum of wgt.         604

50%          975                      Mean           1144.949
                        Largest       Std. dev.      670.2828
75%       1404.5           3337
90%         2141           3526       Variance       449279.1
95%         2566           3700       Skewness       1.341875
99%         3274           4047       Kurtosis       4.756577

. drop if duration_worker<r(p1) | duration_worker>r(p99)
(12 observations deleted)

. disp _N
592

. 
. * Drop remaining 3 surveys that can't be matched to MTurk records
. tab _merge

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        Master only (1) |          3        0.51        0.51
            Matched (3) |        589       99.49      100.00
------------------------+-----------------------------------
                  Total |        592      100.00

. disp _N
592

. drop if _merge!=3
(3 observations deleted)

. disp _N
589

. drop _merge

. 
. * Construct an "id_profile" to link the "recorder" field below (the identifier to track which
. * profile the Javascript showed to an Employer). This ID is just a running count of the workers
. * after sorting by their Qualtrics-assigned response ID.
. sort id_worker

. gen id_profile=_n

. save "${clean}cleaned_worker.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/cleaned_worker.dta saved

. 
. *********************************************************
. * Employer Task (Hiring - Survey 2)
. *********************************************************
. 
. * Insheet, rename, and label the Qualtrics output fields
. * NOTE: MTurk Worker IDs (PII) have been manually removed from the survey output
. insheet using "${raw}Qualtrics-Part2.csv", comma name clear
(51 vars, 592 obs)

. rename responseid id_employer

. rename duration duration_employer

. rename q17 gender_employer

. rename q18 age_employer

. rename q21 education_employer

. label var duration_employer "Survey 2 Duration (Seconds)"

. label var id_employer "ID of Employer"

. label var duration_employer "Survey Duration (Seconds)"

. label var education_employer "Education Level (Employer)"

. label var age_employer "Age (Employer)"

. label var gender_employer "Gender (Employer)"

. 
. * Construct prediction measures (merge to one set of variables)
. gen pred_female=q24_1 if q24_1!=.
(296 missing values generated)

. replace pred_female=q42_1 if q24_1==.
(296 real changes made)

. 
. gen pred_male=q24_14 if q24_14!=.
(296 missing values generated)

. replace pred_male=q42_14 if q24_14==.
(296 real changes made)

. 
. gen pred_us=q24_20 if q24_20!=.
(296 missing values generated)

. replace pred_us=q42_20 if q24_20==.
(296 real changes made)

. 
. gen pred_india=q24_3 if q24_3!=.
(296 missing values generated)

. replace pred_india=q42_3 if q24_3==.
(296 real changes made)

. 
. gen pred_young=q24_4 if q24_4!=.
(296 missing values generated)

. replace pred_young=q42_4 if q24_4==.
(296 real changes made)

. 
. gen pred_old=q24_5 if q24_5!=.
(296 missing values generated)

. replace pred_old=q42_5 if q24_5==.
(296 real changes made)

. 
. * Create variable for incentive condition
. gen incentivized=1 if q42_1!=.
(296 missing values generated)

. replace incentivized=0 if q24_1!=.
(296 real changes made)

. drop q24* q42* prediction_do

. 
. * Save the dataset and then insheet & merge with the MTurk payments 
. * Drops the 6 people who didn't successfully submit the survey, leaving 587 employers
. save "${clean}cleaned_employer.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/cleaned_employer.dta saved

. 
. insheet using "${raw}MTurk-US-Part2.csv", comma name clear
(3 vars, 400 obs)

. rename answersurveycode id_employer

. gen country_employer="United States"

. keep if assignmentstatus=="Approved"
(2 observations deleted)

. keep id_employer country_employer assignmentstatus

. save "${clean}mturk_employer_US.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_employer_US.dta saved

. 
. insheet using "${raw}MTurk-India-Part2.csv", comma name clear 
(3 vars, 195 obs)

. rename answersurveycode id_employer

. gen country_employer="India"

. keep if assignmentstatus=="Approved"
(6 observations deleted)

. keep id_employer country_employer assignmentstatus

. save "${clean}mturk_employer_India.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_employer_India.dta saved

. 
. append using "${clean}mturk_employer_US.dta"
(variable id_employer was str17, now str88 to accommodate using data's values)
(variable country_employer was str5, now str13 to accommodate using data's values)

. label var country_employer "Country (Employer)"

. sort id_employer

. save "${clean}mturk_employer.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/mturk_employer.dta saved

. 
. use "${clean}cleaned_employer.dta", clear

. merge 1:1 id_employer using "${clean}mturk_employer.dta"
(variable id_employer was str17, now str88 to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             7
        from master                         6  (_merge==1)
        from using                          1  (_merge==2)

    Matched                               586  (_merge==3)
    -----------------------------------------

. tab _merge

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        Master only (1) |          6        1.01        1.01
         Using only (2) |          1        0.17        1.18
            Matched (3) |        586       98.82      100.00
------------------------+-----------------------------------
                  Total |        593      100.00

. disp _N
593

. 
. * Assign country ID for one employer who separately emailed their code to the RA
. replace _merge=3 if id_employer=="R_sze0cr7UEpU38Rz"
(1 real change made)

. replace country_employer="United States" if id_employer=="R_sze0cr7UEpU38Rz"
(1 real change made)

. drop if _merge!=3
(6 observations deleted)

. disp _N
587

. drop _merge

. 
. * Follow RA's rule of dropping the 7 employers who took under 5 minutes. Leaves 580 employers.
. disp _N
587

. drop if duration_employer<300
(7 observations deleted)

. disp _N
580

. 
. * Rename the wage variables so that we can reshape later and keep association with profiles
. rename _r wage1

. local i=6

. forval j=2/20 {
  2.     local i=`i'+1 
  3.     rename v`i' wage`j'
  4. }

. rename _q28 wage21

. local i=38

. forval j=22/30 {
  2.     local i=`i'+1
  3.     rename v`i' wage`j'
  4. }

. 
. * Recorder is the field where the profile associated with each wage is stored
. * Split this up so we can then reshape.
. split recorder, p("|")
variables created as string: 
recorder1   recorder5   recorder9   recorder13  recorder17  recorder21  recorder25  recorder29
recorder2   recorder6   recorder10  recorder14  recorder18  recorder22  recorder26  recorder30
recorder3   recorder7   recorder11  recorder15  recorder19  recorder23  recorder27  recorder31
recorder4   recorder8   recorder12  recorder16  recorder20  recorder24  recorder28  recorder32

. * There are 3 cases where it appears the Employer hit next without assigning a score. These are 
. * cases where the order field is doubled (e.g. for R_331MPOZhr0c1Zbt there are two profiles 
. * associated with order "5"). Since it's not clear which one was observed by the employer, we drop
. * these cases. They are employers: R_331MPOZhr0c1Zbt, R_eer8ZzjQ00TZevv, R_uvKzkkeylN7pDkB
. list id_employer recorder* if recorder32!=""

     +-------------------------------------------------------------------------------------------------+
309. |                                              id_employer                                        |
     |                                        R_331MPOZhr0c1Zbt                                        |
     |-------------------------------------------------------------------------------------------------|
     | recorder                                                                                        |
     | 520-0 | 123-1 | 525-2 | 466-3 | 147-4 | 225-5 | 442-5 | 231-6 | 44-7 | 486-8 | 102-9 | 528-10.. |
     |-------------------------------------------------------------------------------------------------|
     | recor~r1  | recor~r2  | recor~r3  | recor~r4  | recor~r5  | recor~r6  |  recor~r7  |  recor~r8  |
     |   520-0   |   123-1   |   525-2   |   466-3   |   147-4   |   225-5   |    442-5   |    231-6   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~r9  | recor~10  | recor~11  | recor~12  | recor~13  | recor~14  |  recor~15  |  recor~16  |
     |    44-7   |   486-8   |   102-9   |  528-10   |  309-11   |   89-12   |   388-13   |   472-14   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~17  | recor~18  | recor~19  | recor~20  | recor~21  | recor~22  |  recor~23  |  recor~24  |
     |   21-15   |    2-16   |  276-17   |  488-18   |  571-19   |   586-0   |    329-1   |    113-2   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~25  | recor~26  | recor~27  | recor~28  | recor~29  | recor~30  |  recor~31  |  recor~32  |
     |   581-3   |   299-4   |   387-5   |   563-6   |   523-7   |   384-8   |    303-9   |       460  |
     +-------------------------------------------------------------------------------------------------+

     +-------------------------------------------------------------------------------------------------+
534. |                                              id_employer                                        |
     |                                        R_eer8ZzjQ00TZevv                                        |
     |-------------------------------------------------------------------------------------------------|
     | recorder                                                                                        |
     | 355-0 | 453-1 | 120-2 | 442-3 | 31-4 | 394-5 | 536-6 | 451-7 | 536-8 | 407-9 | 269-10 | 260-1.. |
     |-------------------------------------------------------------------------------------------------|
     | recor~r1  | recor~r2  | recor~r3  | recor~r4  | recor~r5  | recor~r6  |  recor~r7  |  recor~r8  |
     |   355-0   |   453-1   |   120-2   |   442-3   |    31-4   |   394-5   |    536-6   |    451-7   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~r9  | recor~10  | recor~11  | recor~12  | recor~13  | recor~14  |  recor~15  |  recor~16  |
     |   536-8   |   407-9   |  269-10   |  260-10   |  518-11   |  174-12   |   162-13   |   583-14   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~17  | recor~18  | recor~19  | recor~20  | recor~21  | recor~22  |  recor~23  |  recor~24  |
     |  302-15   |  482-16   |  576-17   |   33-18   |  455-19   |   410-0   |    425-1   |    114-2   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~25  | recor~26  | recor~27  | recor~28  | recor~29  | recor~30  |  recor~31  |  recor~32  |
     |   251-3   |   169-4   |    36-5   |   157-6   |   220-7   |    12-8   |    320-9   |       296  |
     +-------------------------------------------------------------------------------------------------+

     +-------------------------------------------------------------------------------------------------+
559. |                                              id_employer                                        |
     |                                        R_uvKzkkeylN7pDkB                                        |
     |-------------------------------------------------------------------------------------------------|
     | recorder                                                                                        |
     | 9-0 | 378-1 | 456-2 | 130-3 | 409-4 | 94-5 | 25-6 | 355-7 | 152-8 | 435-9 | 403-10 | 520-10 |.. |
     |-------------------------------------------------------------------------------------------------|
     | recor~r1  | recor~r2  | recor~r3  | recor~r4  | recor~r5  | recor~r6  |  recor~r7  |  recor~r8  |
     |     9-0   |   378-1   |   456-2   |   130-3   |   409-4   |    94-5   |     25-6   |    355-7   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~r9  | recor~10  | recor~11  | recor~12  | recor~13  | recor~14  |  recor~15  |  recor~16  |
     |   152-8   |   435-9   |  403-10   |  520-10   |  259-11   |  389-12   |   225-13   |   259-14   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~17  | recor~18  | recor~19  | recor~20  | recor~21  | recor~22  |  recor~23  |  recor~24  |
     |   82-15   |  164-16   |   91-17   |  502-18   |  534-19   |   556-0   |    327-1   |    420-2   |
     |-----------+-----------+-----------+-----------+-----------+-----------+------------+------------|
     | recor~25  | recor~26  | recor~27  | recor~28  | recor~29  | recor~30  |  recor~31  |  recor~32  |
     |   251-3   |   526-4   |   538-5   |   246-6   |    95-7   |   202-8   |    169-9   |       136  |
     +-------------------------------------------------------------------------------------------------+

. drop if recorder32!=""
(3 observations deleted)

. drop recorder32 recorder display

. drop recorder31

. forvalues i=1/30 {
  2.     split recorder`i', p("-") gen(recorder`i'_)
  3.     drop recorder`i'_2 recorder`i'
  4.     rename recorder`i'_1 recorder`i'
  5. }
variables created as string: 
recorder1_1  recorder1_2
variables created as string: 
recorder2_1  recorder2_2
variables created as string: 
recorder3_1  recorder3_2
variables created as string: 
recorder4_1  recorder4_2
variables created as string: 
recorder5_1  recorder5_2
variables created as string: 
recorder6_1  recorder6_2
variables created as string: 
recorder7_1  recorder7_2
variables created as string: 
recorder8_1  recorder8_2
variables created as string: 
recorder9_1  recorder9_2
variables created as string: 
recorder10_1  recorder10_2
variables created as string: 
recorder11_1  recorder11_2
variables created as string: 
recorder12_1  recorder12_2
variables created as string: 
recorder13_1  recorder13_2
variables created as string: 
recorder14_1  recorder14_2
variables created as string: 
recorder15_1  recorder15_2
variables created as string: 
recorder16_1  recorder16_2
variables created as string: 
recorder17_1  recorder17_2
variables created as string: 
recorder18_1  recorder18_2
variables created as string: 
recorder19_1  recorder19_2
variables created as string: 
recorder20_1  recorder20_2
variables created as string: 
recorder21_1  recorder21_2
variables created as string: 
recorder22_1  recorder22_2
variables created as string: 
recorder23_1  recorder23_2
variables created as string: 
recorder24_1  recorder24_2
variables created as string: 
recorder25_1  recorder25_2
variables created as string: 
recorder26_1  recorder26_2
variables created as string: 
recorder27_1  recorder27_2
variables created as string: 
recorder28_1  recorder28_2
variables created as string: 
recorder29_1  recorder29_2
variables created as string: 
recorder30_1  recorder30_2

. reshape long recorder wage, i(id_employer) j(order)
(j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)

Data                               Wide   ->   Long
-----------------------------------------------------------------------------
Number of observations              577   ->   17,310      
Number of variables                  75   ->   18          
j variable (30 values)                    ->   order
xij variables:
     recorder1 recorder2 ... recorder30   ->   recorder
                 wage1 wage2 ... wage30   ->   wage
-----------------------------------------------------------------------------

. rename recorder id_profile

. destring id_profile, replace
id_profile: all characters numeric; replaced as int

. label var order "Order of Worker Profile"

. label var wage "Wage WTP"

. label var id_profile "ID of Profile"

. sort id_profile

. save "${clean}cleaned_employer.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/cleaned_employer.dta saved

. 
. **********************************
. * Merged Employer-Employee Dataset
. **********************************
. 
. * Merge the profiles to the worker file
. merge m:1 id_profile using "${clean}cleaned_worker.dta"
(variable id_profile was int, now float to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                            17,310  (_merge==3)
    -----------------------------------------

. tab _m

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
            Matched (3) |     17,310      100.00      100.00
------------------------+-----------------------------------
                  Total |     17,310      100.00

. disp _N
17310

. drop _m

. 
. * Create tags for summarizing employer- & worker-invariant variables
. egen tag_worker = tag(id_worker)

. egen tag_employer=tag(id_employer)

. 
. * Construct a few more binary variables for the analysis
. foreach X in employer worker {
  2.     gen female_`X'=1 if gender_`X'=="Female"
  3.     replace female_`X'=0 if gender_`X'=="Male"
  4.     gen india_`X'=1 if country_`X'=="India"
  5.     replace india_`X'=0 if country_`X'=="United States"
  6.     gen old_`X'=1 if age_`X'>33 & age_`X'!=.
  7.     replace old_`X'=0 if age_`X'<=33 & age_`X'!=.
  8.     
.     label define female_`X' 1 "Female" 0 "Male"
  9.     label define india_`X' 1 "India" 0 "US"
 10.     label define old_`X' 1 "Over 33" 0 "Under 33"
 11.     label values female_`X' female_`X'
 12.     label values india_`X' india_`X'
 13.     label values old_`X' old_`X'
 14.     label var female_`X' "Female (Yes = 1)"
 15.     label var india_`X' "From India (Yes = 1)"
 16.     label var old_`X' "Over 33 (Yes = 1)"
 17. }
(10,320 missing values generated)
(10,320 real changes made)
(11,760 missing values generated)
(11,760 real changes made)
(10,020 missing values generated)
(10,020 real changes made)
(9,532 missing values generated)
(9,532 real changes made)
(11,485 missing values generated)
(11,485 real changes made)
(9,208 missing values generated)
(9,208 real changes made)

. 
. * Tea/Coffee Preference
. gen preftea_worker=1 if coffeetea=="Tea"
(10,566 missing values generated)

. replace preftea_worker=0 if coffeetea=="Coffee"
(10,566 real changes made)

. label var preftea_worker "Prefer Tea (Yes = 1)"

. 
. * Favorite High School Subject (binarize by whether or not they mention Math)
. tab subject, sort

             Favorite Subject |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
                      English |      1,622        9.37        9.37
                         Math |      1,190        6.87       16.24
                      Science |      1,117        6.45       22.70
                         math |      1,045        6.04       28.73
                      History |      1,043        6.03       34.76
                        Maths |        911        5.26       40.02
                      science |        842        4.86       44.89
                      english |        815        4.71       49.60
                      history |        799        4.62       54.21
                        maths |        594        3.43       57.64
                      Biology |        403        2.33       59.97
                      Physics |        315        1.82       61.79
                        TAMIL |        284        1.64       63.43
                    Chemistry |        282        1.63       65.06
                          Art |        280        1.62       66.68
                      physics |        240        1.39       68.06
                          art |        226        1.31       69.37
                    chemistry |        204        1.18       70.55
                        MATHS |        201        1.16       71.71
                    Geography |        191        1.10       72.81
                      SCIENCE |        179        1.03       73.85
                      ENGLISH |        149        0.86       74.71
                  Mathematics |        129        0.75       75.45
             COMPUTER SCIENCE |        118        0.68       76.14
                      biology |        117        0.68       76.81
                   Psychology |        111        0.64       77.45
             computer science |        108        0.62       78.08
                        music |         98        0.57       78.64
                     Science  |         97        0.56       79.20
                          gym |         88        0.51       79.71
                        choir |         84        0.49       80.20
                         MATH |         77        0.44       80.64
                        tamil |         77        0.44       81.09
             Computer Science |         61        0.35       81.44
                   Chemistry  |         60        0.35       81.79
                        drama |         60        0.35       82.13
                       French |         59        0.34       82.47
                    geography |         59        0.34       82.81
                  MATHEMATICS |         57        0.33       83.14
                     accounts |         54        0.31       83.45
                      Spanish |         53        0.31       83.76
                     History  |         52        0.30       84.06
                        lunch |         51        0.29       84.36
                  mathematics |         47        0.27       84.63
                           PE |         44        0.25       84.88
            Creative Writing  |         41        0.24       85.12
                       SOCIAL |         41        0.24       85.36
                         shop |         41        0.24       85.59
                       Civics |         40        0.23       85.82
            Foreign languages |         40        0.23       86.05
                       botany |         39        0.23       86.28
                       matchs |         39        0.23       86.50
             maths and social |         39        0.23       86.73
                        match |         37        0.21       86.94
                   psychology |         37        0.21       87.16
                     sc ience |         37        0.21       87.37
                     HISTORY  |         36        0.21       87.58
               home education |         35        0.20       87.78
                         ocpm |         35        0.20       87.98
                   Accounting |         34        0.20       88.18
                           14 |         33        0.19       88.37
                     Computer |         33        0.19       88.56
                  Engineering |         33        0.19       88.75
                     English  |         33        0.19       88.94
                   Journalism |         33        0.19       89.13
                        Math  |         33        0.19       89.32
                 Trigonometry |         33        0.19       89.51
                     carpenty |         33        0.19       89.71
                    computers |         33        0.19       89.90
                    COMMERCE  |         32        0.18       90.08
                      French  |         32        0.18       90.27
                   pyschology |         32        0.18       90.45
            COMPUTER NETWORKS |         31        0.18       90.63
             Computer science |         31        0.18       90.81
                       civics |         31        0.18       90.99
                  ACCOUNTANCE |         30        0.17       91.16
                   ACCOUTANCY |         30        0.17       91.33
             Business Studies |         30        0.17       91.51
                        Choir |         30        0.17       91.68
                    Economics |         30        0.17       91.85
                       Histor |         30        0.17       92.03
                           IT |         30        0.17       92.20
                  photography |         30        0.17       92.37
               social studies |         30        0.17       92.55
                  Accountancy |         29        0.17       92.72
                  Advertising |         29        0.17       92.88
                Earth Science |         29        0.17       93.05
                   Mathmatics |         29        0.17       93.22
Music, but non-music, English |         29        0.17       93.39
                     business |         29        0.17       93.55
                          lab |         29        0.17       93.72
                       social |         29        0.17       93.89
                      was/art |         29        0.17       94.06
                          ART |         28        0.16       94.22
                      Anatomy |         28        0.16       94.38
                   Geoghraphy |         28        0.16       94.54
                      HISTORY |         28        0.16       94.70
                      NIRMALA |         28        0.16       94.86
                   Statistics |         28        0.16       95.03
                      phys ed |         28        0.16       95.19
                    sociology |         28        0.16       95.35
                      ALgebra |         27        0.16       95.51
                  Mathametics |         27        0.16       95.66
                 Music Theory |         27        0.16       95.82
               Social Studies |         27        0.16       95.97
           english literature |         27        0.16       96.13
               home economics |         27        0.16       96.29
           mechanical drawing |         27        0.16       96.44
                     science  |         27        0.16       96.60
                           12 |         26        0.15       96.75
                       Botany |         26        0.15       96.90
                     COMPUTER |         26        0.15       97.05
                         MAth |         26        0.15       97.20
                      PHYSICS |         26        0.15       97.35
                   literature |         26        0.15       97.50
        science / electronics |         26        0.15       97.65
               Home economics |         25        0.14       97.79
                     Science! |         25        0.14       97.94
                      Theater |         25        0.14       98.08
        computer applications |         25        0.14       98.23
                     english  |         25        0.14       98.37
                           AV |         24        0.14       98.51
                   Literature |         24        0.14       98.65
                anthropology  |         24        0.14       98.79
                     history  |         23        0.13       98.92
                    mechanics |         23        0.13       99.05
                         Band |         22        0.13       99.18
                    CHEMISTRY |         22        0.13       99.31
Don't remember, too long ago. |         22        0.13       99.43
              Word Processing |         21        0.12       99.56
                      algebra |         21        0.12       99.68
                     robotics |         21        0.12       99.80
                      Nothing |         18        0.10       99.90
            Science/Chemistry |         17        0.10      100.00
------------------------------+-----------------------------------
                        Total |     17,310      100.00

. gen math_fav = strpos(lower(subject), "math") > 0

. tab math_fav

   math_fav |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     12,905       74.55       74.55
          1 |      4,405       25.45      100.00
------------+-----------------------------------
      Total |     17,310      100.00

. label var math_fav "Favorite Subject: Math"

. 
. * Favorite Sport (binarize by whether or not they mention football or soccer)
. tab sport, sort

                         Favorite Sport |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
                               Football |      1,766       10.20       10.20
                               football |      1,741       10.06       20.26
                                Cricket |      1,259        7.27       27.53
                               Baseball |        832        4.81       32.34
                             basketball |        808        4.67       37.01
                             Basketball |        736        4.25       41.26
                                cricket |        732        4.23       45.49
                               baseball |        713        4.12       49.61
                                 soccer |        563        3.25       52.86
                                 tennis |        454        2.62       55.48
                                CRICKET |        428        2.47       57.95
                                 Hockey |        402        2.32       60.28
                                 Tennis |        361        2.09       62.36
                                 Soccer |        359        2.07       64.44
                                 hockey |        253        1.46       65.90
                             Volleyball |        222        1.28       67.18
                                   none |        188        1.09       68.27
                               swimming |        163        0.94       69.21
                               Cricket  |        150        0.87       70.08
                               Swimming |        148        0.85       70.93
                             gymnastics |        119        0.69       71.62
                                bowling |        115        0.66       72.28
                             volleyball |        110        0.64       72.92
                            Ice Skating |        107        0.62       73.54
                              Football  |         93        0.54       74.07
                                   golf |         93        0.54       74.61
                                fishing |         92        0.53       75.14
                                    MMA |         86        0.50       75.64
                                   Golf |         85        0.49       76.13
                               FOODBALL |         77        0.44       76.57
                        Track and field |         71        0.41       76.98
                                 TENNIS |         67        0.39       77.37
                            VALLEY BALL |         67        0.39       77.76
                              Foot ball |         65        0.38       78.13
                                cycling |         63        0.36       78.50
                                 HOCKEY |         60        0.35       78.84
                                 RUNING |         58        0.34       79.18
                               softball |         56        0.32       79.50
                          Skateboarding |         52        0.30       79.80
                                  chess |         51        0.29       80.10
                              Badminton |         50        0.29       80.39
                               FOOTBALL |         47        0.27       80.66
                                running |         47        0.27       80.93
                       College football |         44        0.25       81.18
                              FOOT BALL |         44        0.25       81.44
                              foot ball |         43        0.25       81.69
                                reading |         43        0.25       81.94
                                Surfing |         41        0.24       82.17
                      horse back riding |         40        0.23       82.40
                                  Track |         39        0.23       82.63
                              badmitton |         39        0.23       82.85
                               cricket  |         39        0.23       83.08
                            Basketball  |         38        0.22       83.30
running and yoga (not technically spo.. |         38        0.22       83.52
                           Bodybuilding |         37        0.21       83.73
                           Snowboarding |         37        0.21       83.95
                                 Squash |         37        0.21       84.16
                            Starcraft 2 |         37        0.21       84.37
                            settle hork |         37        0.21       84.59
                             BASKETBALL |         36        0.21       84.79
                              food ball |         36        0.21       85.00
                                 hiking |         36        0.21       85.21
                                Bowling |         35        0.20       85.41
                                 RINING |         35        0.20       85.62
                                TINNIES |         35        0.20       85.82
                         dont have one  |         35        0.20       86.02
                          nordic skiing |         35        0.20       86.22
                              Formula 1 |         34        0.20       86.42
                                   GAKI |         34        0.20       86.61
                              RING BOLL |         34        0.20       86.81
                            Shuttlecock |         34        0.20       87.01
                         figure skating |         34        0.20       87.20
                                tennies |         34        0.20       87.40
                                Kabaddi |         33        0.19       87.59
                  long distance running |         33        0.19       87.78
                                sailing |         33        0.19       87.97
                             vally ball |         33        0.19       88.16
                          Cross Country |         32        0.18       88.35
                                  HOCKY |         32        0.18       88.53
                             Ice Hockey |         32        0.18       88.72
                                 Kabadi |         32        0.18       88.90
                                Kho-kho |         32        0.18       89.09
                                 Nascar |         32        0.18       89.27
                                Running |         32        0.18       89.46
                          sport dancing |         32        0.18       89.64
                                  track |         32        0.18       89.83
                                  CHESS |         31        0.18       90.01
                                Hunting |         31        0.18       90.18
                                   None |         31        0.18       90.36
                                    UFC |         31        0.18       90.54
                              Vallyball |         31        0.18       90.72
                            Volley ball |         31        0.18       90.90
                              badminton |         31        0.18       91.08
                                KABBADI |         30        0.17       91.25
                                  KO KO |         30        0.17       91.43
                                    nfl |         30        0.17       91.60
                                Archery |         29        0.17       91.77
                         Figure skating |         29        0.17       91.94
                                 RANING |         29        0.17       92.10
                            Snow Skiing |         29        0.17       92.27
                           snowboarding |         29        0.17       92.44
                            volley ball |         29        0.17       92.61
                               Shooting |         28        0.16       92.77
                                 Tennid |         28        0.16       92.93
                              VALLYBALL |         28        0.16       93.09
                                    WWE |         28        0.16       93.25
                              Wrestling |         28        0.16       93.41
                           cheerleading |         28        0.16       93.58
                                footbal |         28        0.16       93.74
                                kabaddi |         28        0.16       93.90
                                  rugby |         28        0.16       94.06
  trail riding horses or mountain bikes |         28        0.16       94.22
                              wrestling |         28        0.16       94.38
                                   COCO |         27        0.16       94.54
                     Mixed Martial Arts |         27        0.16       94.70
                           Powerlifting |         27        0.16       94.85
                               Sleeping |         27        0.16       95.01
                                  hking |         27        0.16       95.16
                           martial arts |         27        0.16       95.32
                              BADMINTON |         26        0.15       95.47
                               BATMITON |         26        0.15       95.62
                             Badminton  |         26        0.15       95.77
                                Fishing |         26        0.15       95.92
                                KAPPATI |         26        0.15       96.07
                                    MLB |         26        0.15       96.22
                                 Racing |         26        0.15       96.37
                                 biking |         26        0.15       96.52
                         College hockey |         25        0.14       96.67
                              FOODBALL  |         25        0.14       96.81
                             Gymnastics |         25        0.14       96.96
                                  Hocky |         25        0.14       97.10
                                   MMA  |         25        0.14       97.24
                         Weight lifting |         25        0.14       97.39
                                 criket |         25        0.14       97.53
                               kayaking |         25        0.14       97.68
                                 sachin |         25        0.14       97.82
                                 skiing |         25        0.14       97.97
                                surfing |         25        0.14       98.11
                            BASKET BALL |         24        0.14       98.25
                             GYMNASTICS |         24        0.14       98.39
                            Racquebtall |         24        0.14       98.53
                       college football |         24        0.14       98.67
                               lacrosse |         24        0.14       98.80
                                Curling |         23        0.13       98.94
                                 HIking |         23        0.13       99.07
                                Kabbadi |         23        0.13       99.20
                            dog mushing |         23        0.13       99.34
                           Horse Racing |         22        0.13       99.46
                                    N/A |         22        0.13       99.59
                            basket ball |         21        0.12       99.71
                  ESports (Video Games) |         18        0.10       99.82
                                 kabadi |         17        0.10       99.91
                              FOOD BALL |         15        0.09      100.00
----------------------------------------+-----------------------------------
                                  Total |     17,310      100.00

. gen football_fav = strpos(lower(sport), "soccer") > 0

. replace football_fav = 1 if strpos(lower(sport), "football") > 0
(3,715 real changes made)

. tab football_fav

football_fa |
          v |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     12,673       73.21       73.21
          1 |      4,637       26.79      100.00
------------+-----------------------------------
      Total |     17,310      100.00

. label var football_fav "Favorite Sport: Football or Soccer"

. 
. * Favorite Color (binarize by whether or not they mention blue)
. tab color, sort

   Favorite |
      Color |      Freq.     Percent        Cum.
------------+-----------------------------------
       blue |      3,469       20.04       20.04
       Blue |      2,715       15.68       35.73
        red |      1,189        6.87       42.59
      green |      1,175        6.79       49.38
      Green |        960        5.55       54.93
        Red |        787        4.55       59.47
     purple |        671        3.88       63.35
     Purple |        613        3.54       66.89
       pink |        607        3.51       70.40
      black |        495        2.86       73.26
      Black |        487        2.81       76.07
       BLUE |        359        2.07       78.15
       PINK |        322        1.86       80.01
     yellow |        282        1.63       81.63
      White |        265        1.53       83.17
       Pink |        263        1.52       84.69
        RED |        215        1.24       85.93
     Orange |        197        1.14       87.07
      white |        180        1.04       88.11
      BLACK |        138        0.80       88.90
     Yellow |        129        0.75       89.65
     Silver |        103        0.60       90.24
       Teal |         95        0.55       90.79
     Black  |         92        0.53       91.32
      Brown |         80        0.46       91.79
      WHITE |         66        0.38       92.17
       Gray |         65        0.38       92.54
     silver |         64        0.37       92.91
        ROS |         63        0.36       93.28
       BINK |         61        0.35       93.63
     violet |         57        0.33       93.96
     Maroon |         54        0.31       94.27
       grey |         51        0.29       94.56
      GREEN |         45        0.26       94.82
    Purple  |         41        0.24       95.06
  Teal blue |         40        0.23       95.29
   Burgundy |         38        0.22       95.51
      64110 |         37        0.21       95.73
       teal |         36        0.21       95.93
        Bue |         35        0.20       96.14
  turquoise |         35        0.20       96.34
       Red  |         33        0.19       96.53
     orange |         33        0.19       96.72
 royal blue |         33        0.19       96.91
       BILK |         32        0.18       97.09
       Bkue |         32        0.18       97.28
     GREEN  |         32        0.18       97.46
   Lavander |         32        0.18       97.65
      Gray  |         31        0.18       97.83
        Tan |         31        0.18       98.01
       navy |         29        0.17       98.17
   SKY BLUE |         28        0.16       98.34
     green  |         28        0.16       98.50
  sea green |         28        0.16       98.66
dark purple |         27        0.16       98.82
      lilac |         27        0.16       98.97
   Sky Blue |         26        0.15       99.12
     ORANGE |         25        0.14       99.27
      PINK  |         25        0.14       99.41
      blue? |         25        0.14       99.56
  Navy Blue |         24        0.14       99.69
       Gold |         21        0.12       99.82
   lavender |         16        0.09       99.91
       none |         16        0.09      100.00
------------+-----------------------------------
      Total |     17,310      100.00

. gen blue_fav = strpos(lower(color), "blue") > 0

. tab blue_fav

   blue_fav |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     10,591       61.18       61.18
          1 |      6,719       38.82      100.00
------------+-----------------------------------
      Total |     17,310      100.00

. label var blue_fav "Favorite Color: Blue"

. 
. * Favorite Movie (binarize by whether or not they mention any movie noted by at least 5 people)
. tab movie if tag_worker == 1, sort

                         Favorite Movie |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
                                Titanic |         18        3.06        3.06
                              Star Wars |         10        1.70        4.75
                                titanic |         10        1.70        6.45
                   Shawshank Redemption |          8        1.36        7.81
                              Inception |          6        1.02        8.83
                      Lord of the Rings |          5        0.85        9.68
                             The Matrix |          5        0.85       10.53
                     The Princess Bride |          5        0.85       11.38
                                 Avatar |          4        0.68       12.05
                          Black Panther |          4        0.68       12.73
                           Harry Potter |          4        0.68       13.41
                              SPIDERMAN |          4        0.68       14.09
                                 Aliens |          3        0.51       14.60
                           Blade Runner |          3        0.51       15.11
                           Forrest Gump |          3        0.51       15.62
                   Fried Green Tomatoes |          3        0.51       16.13
                             Goodfellas |          3        0.51       16.64
                       The Big Lebowski |          3        0.51       17.15
                          The Godfather |          3        0.51       17.66
                                 avatar |          3        0.51       18.17
                                 mersal |          3        0.51       18.68
                              star wars |          3        0.51       19.19
                                   thor |          3        0.51       19.69
                                      3 |          2        0.34       20.03
                          Almost Famous |          2        0.34       20.37
                                 Amelie |          2        0.34       20.71
                               Avengers |          2        0.34       21.05
                                 Batman |          2        0.34       21.39
                             Braveheart |          2        0.34       21.73
                             Casablanca |          2        0.34       22.07
                               Die Hard |          2        0.34       22.41
                        Dumb and Dumber |          2        0.34       22.75
                             Eraserhead |          2        0.34       23.09
  Eternal Sunshine of the Spotless Mind |          2        0.34       23.43
                                FRIENDS |          2        0.34       23.77
                             Fight Club |          2        0.34       24.11
                            Forest Gump |          2        0.34       24.45
                     Ghost in the Shell |          2        0.34       24.79
                              Gladiator |          2        0.34       25.13
                              In Bruges |          2        0.34       25.47
                                 Indian |          2        0.34       25.81
                          Jurassic Park |          2        0.34       26.15
                                   ROCK |          2        0.34       26.49
                                  Rocky |          2        0.34       26.83
                                   Roja |          2        0.34       27.16
                         Shutter Island |          2        0.34       27.50
                         Sound of Music |          2        0.34       27.84
                        Steel Magnolias |          2        0.34       28.18
                            TAMIL MOVIE |          2        0.34       28.52
                                  THERI |          2        0.34       28.86
                                    Ted |          2        0.34       29.20
                        The Dark Knight |          2        0.34       29.54
                           The Notebook |          2        0.34       29.88
               The Shawshank Redemption |          2        0.34       30.22
                            The Shining |          2        0.34       30.56
                                  Theri |          2        0.34       30.90
                                Tremors |          2        0.34       31.24
                            forest gump |          2        0.34       31.58
                              godfather |          2        0.34       31.92
                          groundhog day |          2        0.34       32.26
                           harry potter |          2        0.34       32.60
                                ice age |          2        0.34       32.94
                              inception |          2        0.34       33.28
                                 indian |          2        0.34       33.62
                          jurassic park |          2        0.34       33.96
                              kill bill |          2        0.34       34.30
                               scarface |          2        0.34       34.63
                   shawshank redemption |          2        0.34       34.97
                       ten commandments |          2        0.34       35.31
                              the thing |          2        0.34       35.65
                   2001 a space odyssey |          1        0.17       35.82
                             27 Dresses |          1        0.17       35.99
                               3 Idiots |          1        0.17       36.16
                               3 idiots |          1        0.17       36.33
                                    300 |          1        0.17       36.50
                            5th Element |          1        0.17       36.67
                  A League of their Own |          1        0.17       36.84
                             A New Hope |          1        0.17       37.01
                         A few good men |          1        0.17       37.18
                            A wednesday |          1        0.17       37.35
                             AASHIQUE 2 |          1        0.17       37.52
                                   ARAM |          1        0.17       37.69
                                AVATHAR |          1        0.17       37.86
                                 Action |          1        0.17       38.03
                               Airplane |          1        0.17       38.20
                              Airplane! |          1        0.17       38.37
                                  Alien |          1        0.17       38.54
All of the Twilight, Harry Potter, an.. |          1        0.17       38.71
                        American Beauty |          1        0.17       38.88
                         Apocolyspe Now |          1        0.17       39.05
                          BABYS DAY OUT |          1        0.17       39.22
                                    BIG |          1        0.17       39.39
                                   Babe |          1        0.17       39.56
                              Baby Boss |          1        0.17       39.73
                     Back to the Future |          1        0.17       39.90
                               Bahubali |          1        0.17       40.07
                                 Baraka |          1        0.17       40.24
                                Beaches |          1        0.17       40.41
                            Beetlejuice |          1        0.17       40.58
                            Being There |          1        0.17       40.75
                 Ben Hur (the original) |          1        0.17       40.92
                                   Bent |          1        0.17       41.09
                              Bhahubali |          1        0.17       41.26
                             Big Hero 6 |          1        0.17       41.43
                           Big Lebowski |          1        0.17       41.60
                        Black Hawk Down |          1        0.17       41.77
                          Black panther |          1        0.17       41.94
                            Bladerunner |          1        0.17       42.11
                        Blazing Saddles |          1        0.17       42.28
                            Blue Streak |          1        0.17       42.44
              Blue is the Warmest Color |          1        0.17       42.61
                        Bourne Identity |          1        0.17       42.78
                         Breakfast Club |          1        0.17       42.95
                                 CLERKS |          1        0.17       43.12
                              CONJURIAN |          1        0.17       43.29
                              Cast away |          1        0.17       43.46
                                 Comedy |          1        0.17       43.63
                              Conjuring |          1        0.17       43.80
                 Conversations With God |          1        0.17       43.97
                                   DDLJ |          1        0.17       44.14
                                    DUM |          1        0.17       44.31
                         Daddy's home 2 |          1        0.17       44.48
                     Dances With Wolves |          1        0.17       44.65
                     Dances with Wolves |          1        0.17       44.82
                            Dark Knight |          1        0.17       44.99
                            Deer Hunter |          1        0.17       45.16
                      Devil Wears Prada |          1        0.17       45.33
                             District 9 |          1        0.17       45.50
                                Dunkirk |          1        0.17       45.67
                                     ET |          1        0.17       45.84
                             Easy Rider |          1        0.17       46.01
                            Emoji Movie |          1        0.17       46.18
                         Empire Records |          1        0.17       46.35
                                 Endran |          1        0.17       46.52
                             Enemy Mine |          1        0.17       46.69
                               Eurotrip |          1        0.17       46.86
                            Expandables |          1        0.17       47.03
                       Fast and Furious |          1        0.17       47.20
                       Fast and furious |          1        0.17       47.37
                      Fast and furious  |          1        0.17       47.54
                                   Folk |          1        0.17       47.71
                                Freeway |          1        0.17       47.88
                           Fright Night |          1        0.17       48.05
                                 Gandhi |          1        0.17       48.22
                                Get Out |          1        0.17       48.39
              Girls Just Wanna Have Fun |          1        0.17       48.56
                             Gladiator  |          1        0.17       48.73
                               Gladitor |          1        0.17       48.90
                              Gone Girl |          1        0.17       49.07
                     Gone With the Wind |          1        0.17       49.24
                     Gone with the WInd |          1        0.17       49.41
                     Gone with the wind |          1        0.17       49.58
                            Goodfellas  |          1        0.17       49.75
                                Goonies |          1        0.17       49.92
                                 Grease |          1        0.17       50.08
                          Harlem Nights |          1        0.17       50.25
                    Harry Potter series |          1        0.17       50.42
                           Harry potter |          1        0.17       50.59
                               Heathers |          1        0.17       50.76
             Hedwig and the Angry Inch  |          1        0.17       50.93
                                    Her |          1        0.17       51.10
                     Herby goes bananas |          1        0.17       51.27
        Hitchhikers Guide to the Galaxy |          1        0.17       51.44
                                  Holes |          1        0.17       51.61
                             Home Alone |          1        0.17       51.78
                               Hoosiers |          1        0.17       51.95
                        Horrible Bosses |          1        0.17       52.12
                                 Horror |          1        0.17       52.29
                               Hot Fuzz |          1        0.17       52.46
         How the Grinch stole christmas |          1        0.17       52.63
                           Hunger Games |          1        0.17       52.80
                      I do not have one |          1        0.17       52.97
                     I don't have any.  |          1        0.17       53.14
                       I don't have one |          1        0.17       53.31
                      I don't have one. |          1        0.17       53.48
                                ICE AGE |          1        0.17       53.65
                         Identity Thief |          1        0.17       53.82
                                Inferno |          1        0.17       53.99
                           Interstellar |          1        0.17       54.16
                          Interstellar  |          1        0.17       54.33
                               Iron Man |          1        0.17       54.50
                                  JESUS |          1        0.17       54.67
                     JESUS CHRIST MOVIE |          1        0.17       54.84
                                JUMANGE |          1        0.17       55.01
                         JUSTICE LEAGUE |          1        0.17       55.18
             James Bond - Casino Royale |          1        0.17       55.35
                       Jeepers Creepers |          1        0.17       55.52
                                Jumanji |          1        0.17       55.69
                            Jungle Book |          1        0.17       55.86
                           Jurasik Park |          1        0.17       56.03
                          Jurassic park |          1        0.17       56.20
                          Jurrasic Park |          1        0.17       56.37
                                  KAYAL |          1        0.17       56.54
                            KOODIVEERAN |          1        0.17       56.71
                                  KOVIL |          1        0.17       56.88
                             KakkaKakka |          1        0.17       57.05
                              Lady Bird |          1        0.17       57.22
                         Legally blonde |          1        0.17       57.39
                          Les Miserable |          1        0.17       57.56
                      Lord Of The Rings |          1        0.17       57.72
                                   Love |          1        0.17       57.89
                          Love Actually |          1        0.17       58.06
                             Love Movie |          1        0.17       58.23
                                  Lucia |          1        0.17       58.40
                                  MAARI |          1        0.17       58.57
                                 MERSAL |          1        0.17       58.74
                                Mad Max |          1        0.17       58.91
                                 Matrix |          1        0.17       59.08
                         McFarlane, USA |          1        0.17       59.25
                                Memento |          1        0.17       59.42
                      Miller's Crossing |          1        0.17       59.59
                    Million Dollar Baby |          1        0.17       59.76
                        Minority Report |          1        0.17       59.93
                    Mission Impossible  |          1        0.17       60.10
                           Modern times |          1        0.17       60.27
                                   Moon |          1        0.17       60.44
                               NARNIYA  |          1        0.17       60.61
                                Newsies |          1        0.17       60.78
               Night of the Living Dead |          1        0.17       60.95
      No favorite but love Harry Potter |          1        0.17       61.12
             O' Brother Where Art Thou? |          1        0.17       61.29
                         Ocean's Eleven |          1        0.17       61.46
                        Ocean's eleven  |          1        0.17       61.63
                              Overboard |          1        0.17       61.80
                           PRETTY WOMAN |          1        0.17       61.97
                  PRIYATHA VARAM VENDUM |          1        0.17       62.14
                            Pacific Rim |          1        0.17       62.31
                         Pan's Labrynth |          1        0.17       62.48
                       Paris is Burning |          1        0.17       62.65
                      Passion Of Christ |          1        0.17       62.82
                         Paths of Glory |          1        0.17       62.99
                   Pirates of caribbean |          1        0.17       63.16
               Pirates of the Caribbean |          1        0.17       63.33
                           Pretty Woman |          1        0.17       63.50
                         Princess Bride |          1        0.17       63.67
                           Pulp Fiction |          1        0.17       63.84
                            Purple Rain |          1        0.17       64.01
                              RAJA RANI |          1        0.17       64.18
                Raiders of the Lost Ark |          1        0.17       64.35
                                  Rambo |          1        0.17       64.52
                                Rebecca |          1        0.17       64.69
                    Remember the Titans |          1        0.17       64.86
                    Requiem for a Dream |          1        0.17       65.03
                  Rise of the Guardians |          1        0.17       65.20
                              Rush Hour |          1        0.17       65.37
                                 SCREAM |          1        0.17       65.53
                   SHAWSHANK REDEMPTION |          1        0.17       65.70
                           Sargent York |          1        0.17       65.87
                                  Saved |          1        0.17       66.04
                    Saving Private Ryan |          1        0.17       66.21
                               Scarface |          1        0.17       66.38
                                 Scream |          1        0.17       66.55
                            Serendipity |          1        0.17       66.72
                                  Seven |          1        0.17       66.89
                        Sherlock holmes |          1        0.17       67.06
                                Sicario |          1        0.17       67.23
                                  Signs |          1        0.17       67.40
                   Silence of the Lambs |          1        0.17       67.57
                                 Snatch |          1        0.17       67.74
                       Someone like you |          1        0.17       67.91
                              Space Jam |          1        0.17       68.08
                             Spaceballs |          1        0.17       68.25
                              Spiderman |          1        0.17       68.42
                           Star Wars IV |          1        0.17       68.59
                              Star wars |          1        0.17       68.76
                               Stardust |          1        0.17       68.93
                               Stargate |          1        0.17       69.10
                          Step Brothers |          1        0.17       69.27
                       Superman Returns |          1        0.17       69.44
                             TAMI MOVIE |          1        0.17       69.61
                                TEMPER  |          1        0.17       69.78
                             TERMINATOR |          1        0.17       69.95
                     THE BOOK OF MASTER |          1        0.17       70.12
                                 THREE  |          1        0.17       70.29
                                  THRRI |          1        0.17       70.46
                       THe Color Purple |          1        0.17       70.63
                                TITANIC |          1        0.17       70.80
                                  Taken |          1        0.17       70.97
                       Ten Commandmends |          1        0.17       71.14
                           The Avengers |          1        0.17       71.31
                       The Big Libowski |          1        0.17       71.48
                     The Black Stallion |          1        0.17       71.65
                     The Breakfast Club |          1        0.17       71.82
                       The Dark Knight- |          1        0.17       71.99
                           The Departed |          1        0.17       72.16
                         The Evil Woods |          1        0.17       72.33
                           The Fugitive |          1        0.17       72.50
            The Good, the Bad, the Ugly |          1        0.17       72.67
                            The Goonies |          1        0.17       72.84
                           The Graudate |          1        0.17       73.01
                         The Green Mile |          1        0.17       73.17
                           The Hangover |          1        0.17       73.34
                               The Help |          1        0.17       73.51
                    The Incredible Hulk |          1        0.17       73.68
                        The Iron Gianyt |          1        0.17       73.85
                          The Lion King |          1        0.17       74.02
                    The Little Mermaid  |          1        0.17       74.19
                The Long Kiss Goodnight |          1        0.17       74.36
                        The Longest Day |          1        0.17       74.53
                  The Lord of the Rings |          1        0.17       74.70
                            The Matrix  |          1        0.17       74.87
                          The Money Pit |          1        0.17       75.04
          The Pirates of the Caribbean  |          1        0.17       75.21
                               The Ride |          1        0.17       75.38
                  The Thing Called Love |          1        0.17       75.55
                       The Transformers |          1        0.17       75.72
                     The Usual Suspects |          1        0.17       75.89
                       The book of Eli  |          1        0.17       76.06
                  The devil wears prada |          1        0.17       76.23
              The good the bad the ugly |          1        0.17       76.40
                        The jungle book |          1        0.17       76.57
                           The punisher |          1        0.17       76.74
                    The shape of water  |          1        0.17       76.91
                                 Theran |          1        0.17       77.08
                   There will be blood  |          1        0.17       77.25
                        Thiruda Thiruda |          1        0.17       77.42
                                   Thor |          1        0.17       77.59
                               Titanitc |          1        0.17       77.76
                  To Kill a Mockingbird |          1        0.17       77.93
                              Tombstone |          1        0.17       78.10
                                Top Gun |          1        0.17       78.27
                              Toy Story |          1        0.17       78.44
                           Transformers |          1        0.17       78.61
           Transformers the last knight |          1        0.17       78.78
          Transformers: The Last Knight |          1        0.17       78.95
                       Twelve angry men |          1        0.17       79.12
                              Twilight  |          1        0.17       79.29
                          ULLAMNINAITHU |          1        0.17       79.46
                            Unbreakable |          1        0.17       79.63
                         V for vendetta |          1        0.17       79.80
                            VALVE MAYAM |          1        0.17       79.97
                              VASIKARA  |          1        0.17       80.14
                                 VEERAM |          1        0.17       80.31
                               VIVEGAM  |          1        0.17       80.48
                                   Vali |          1        0.17       80.65
                        Victor/Victoria |          1        0.17       80.81
                      View From the Top |          1        0.17       80.98
                               Waitress |          1        0.17       81.15
                                 Wanted |          1        0.17       81.32
         War for the Planet of the Apes |          1        0.17       81.49
                               Warriors |          1        0.17       81.66
                          Wayne's World |          1        0.17       81.83
                       Wedding Crashers |          1        0.17       82.00
                   When Harry Met Sally |          1        0.17       82.17
                      Where eagles dare |          1        0.17       82.34
                Who Framed Roger Rabbit |          1        0.17       82.51
                          Wild at Heart |          1        0.17       82.68
                           Wizard of OZ |          1        0.17       82.85
                                 Wonder |          1        0.17       83.02
                      a christmas story |          1        0.17       83.19
                                 action |          1        0.17       83.36
                               airplane |          1        0.17       83.53
                    alice in wonderland |          1        0.17       83.70
                          almost famous |          1        0.17       83.87
                              annamalai |          1        0.17       84.04
                        anything marvel |          1        0.17       84.21
                             armageddon |          1        0.17       84.38
                                  aruvi |          1        0.17       84.55
                               avangers |          1        0.17       84.72
                               avengers |          1        0.17       84.89
                                   babe |          1        0.17       85.06
                                bairava |          1        0.17       85.23
                           blac panther |          1        0.17       85.40
                          blair witch 2 |          1        0.17       85.57
                  bridget jones's diary |          1        0.17       85.74
                    catch me if you can |          1        0.17       85.91
                              cinderlla |          1        0.17       86.08
                        cinema paradiso |          1        0.17       86.25
                            city lights |          1        0.17       86.42
                                 closer |          1        0.17       86.59
                                   clue |          1        0.17       86.76
                              conjuring |          1        0.17       86.93
                            dark knight |          1        0.17       87.10
                               die hard |          1        0.17       87.27
                          dirty dancing |          1        0.17       87.44
                          dont have one |          1        0.17       87.61
                                english |          1        0.17       87.78
                               enthiran |          1        0.17       87.95
                       eternal sunshine |          1        0.17       88.12
  eternal sunshine of the spotless mind |          1        0.17       88.29
                              ferngully |          1        0.17       88.46
                            first blood |          1        0.17       88.62
                                 frends |          1        0.17       88.79
                              frequency |          1        0.17       88.96
                                friends |          1        0.17       89.13
                             funny face |          1        0.17       89.30
                               g.i. joe |          1        0.17       89.47
                     gone with the wind |          1        0.17       89.64
                                 grease |          1        0.17       89.81
                               hangover |          1        0.17       89.98
                           hunger games |          1        0.17       90.15
i dont like movies always fall asleep.. |          1        0.17       90.32
                           interstellar |          1        0.17       90.49
                                   iron |          1        0.17       90.66
                                   jaws |          1        0.17       90.83
                          jerry maguire |          1        0.17       91.00
                              jodaakbar |          1        0.17       91.17
                              john wick |          1        0.17       91.34
                               karuppan |          1        0.17       91.51
                              lala land |          1        0.17       91.68
                     lawrence of arabia |          1        0.17       91.85
                              lion king |          1        0.17       92.02
                          lord of rings |          1        0.17       92.19
                      lord of the rings |          1        0.17       92.36
                                   lotr |          1        0.17       92.53
                         love actually  |          1        0.17       92.70
                                 matrix |          1        0.17       92.87
                         matrix reloded |          1        0.17       93.04
                                    mib |          1        0.17       93.21
                           moulin rouge |          1        0.17       93.38
                                 nikita |          1        0.17       93.55
                            none really |          1        0.17       93.72
                              oceans 11 |          1        0.17       93.89
                             paper moon |          1        0.17       94.06
                                pirates |          1        0.17       94.23
                                  porky |          1        0.17       94.40
                                 premam |          1        0.17       94.57
                                  racer |          1        0.17       94.74
                                   raja |          1        0.17       94.91
                                    ram |          1        0.17       95.08
                                romance |          1        0.17       95.25
                              romantic  |          1        0.17       95.42
                               rounders |          1        0.17       95.59
            salmon fishing in the yemen |          1        0.17       95.76
                               shinning |          1        0.17       95.93
                                 singam |          1        0.17       96.10
                                  speed |          1        0.17       96.26
                             spider man |          1        0.17       96.43
                              spiderman |          1        0.17       96.60
                       star wars series |          1        0.17       96.77
                            story of us |          1        0.17       96.94
                        the Right Stuff |          1        0.17       97.11
                            the croods  |          1        0.17       97.28
                          the godfather |          1        0.17       97.45
                             the jacket |          1        0.17       97.62
                      the lord of rings |          1        0.17       97.79
                             the matrix |          1        0.17       97.96
                          the note book |          1        0.17       98.13
                       the painted veil |          1        0.17       98.30
                           the prestige |          1        0.17       98.47
                         the two towers |          1        0.17       98.64
                                tic tic |          1        0.17       98.81
                 to many to choose from |          1        0.17       98.98
                              toy story |          1        0.17       99.15
                               twilight |          1        0.17       99.32
                                 vinner |          1        0.17       99.49
                    wolf of wall street |          1        0.17       99.66
                                  x-men |          1        0.17       99.83
                              zoolander |          1        0.17      100.00
----------------------------------------+-----------------------------------
                                  Total |        589      100.00

. gen popmovie_fav = strpos(lower(movie), "titanic") > 0

. replace popmovie_fav = 1 if strpos(lower(movie), "star wars") > 0
(478 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "shawshank") > 0
(410 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "avatar") > 0
(199 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "inception") > 0
(257 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "rings") > 0
(325 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "matrix") > 0
(313 real changes made)

. replace popmovie_fav = 1 if strpos(lower(movie), "princess bride") > 0
(181 real changes made)

. tab popmovie_fav if tag_worker==1

popmovie_fa |
          v |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |        489       83.02       83.02
          1 |        100       16.98      100.00
------------+-----------------------------------
      Total |        589      100.00

. label var popmovie_fav "Favorite Movie: Popular Movie"

. 
. * Demographics of Employer
. gen educ_college_employer=1 

. replace educ_college_employer=0 if education_employer=="Some College" | ///
>   education_employer=="Vocational/Technical School (2 year)" | ///
>   education_employer=="High School or equivalent" | ///
>   education_employer=="Less than High School"
(5,730 real changes made)

. label var educ_college_employer "College Education or Above"

. 
. * Divide survey duration variables so they are in minutes instead of seconds
. for X in any worker employer: replace duration_X=duration_X/60

->  replace duration_worker=duration_worker/60
variable duration_worker was int now float
(17,310 real changes made)

->  replace duration_employer=duration_employer/60
variable duration_employer was int now float
(17,310 real changes made)

. label var duration_worker "Survey 1 Duration (Minutes)"

. label var duration_employer "Survey 2 Duration (Minutes)"

. 
. label var incentivized "Predictions Incentivized"

. gen hiringtask_number=1 if order<=20
(5,770 missing values generated)

. replace hiringtask_number=2 if order>20 & order<=30
(5,770 real changes made)

. label var hiringtask_number "Hiring Task Number (1 = Before Info)"

. label var pred_female "Prediction for Female Workers"

. label var pred_male "Prediction for Male Workers"

. label var pred_india "Prediction for Indian Workers"

. label var pred_us "Prediction for US Workers"

. label var pred_old "Prediction for Over Workers"

. label var pred_young "Prediction for Under 33 Workers"

. 
. save "${clean}inaccurate_discrimination.dta", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Data-Extract/inaccurate_discrimination.dta saved

. 
. ****************************************************************************************************
. *  2. Main Paper Tables and Figures
. ****************************************************************************************************
. 
. use "${clean}inaccurate_discrimination.dta", clear

. 
. * Table 2: Wages & Productivities for Hiring Task 1
. foreach X in female india old {
  2.     foreach Y in wage score {
  3.         ttest `Y' if hiringtask==1, by(`X'_worker)
  4.         local `Y'_mean0_`X'=r(mu_1)
  5.         local `Y'_mean1_`X'=r(mu_2)
  6.         local `Y'_sd0_`X'=r(sd_1)
  7.         local `Y'_sd1_`X'=r(sd_2)
  8.         local `Y'_N0_`X'=r(N_1)
  9.         local `Y'_N1_`X'=r(N_2)
 10.                 reghdfe `Y' `X'_worker if hiringtask==1, cluster(id_employer id_worker) noabsorb
 11.             mat mytable = r(table)
 12.         local `Y'_diff_`X'=-1*mytable[1,1]
 13.         local `Y'_p_`X'=mytable[4,1]
 14.         
.         local `Y'_mean0_`X': di %6.2f ``Y'_mean0_`X'' 
 15.         local `Y'_sd0_`X': di %6.2f ``Y'_sd0_`X'' 
 16.         local `Y'_N0_`X': di %12.0gc ``Y'_N0_`X'' 
 17.         local `Y'_mean1_`X': di %6.2f ``Y'_mean1_`X'' 
 18.         local `Y'_sd1_`X': di %6.2f ``Y'_sd1_`X'' 
 19.         local `Y'_N1_`X': di %12.0gc ``Y'_N1_`X'' 
 20.         local `Y'_p_`X': di %6.2f ``Y'_p_`X'' 
 21.         local `Y'_diff_`X': di %6.2f ``Y'_diff_`X'' 
 22.     }
 23. }

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
    Male |   6,306    31.89758    .1520561    12.07482     31.5995    32.19566
  Female |   5,234    30.84804    .1690719    12.23174    30.51659    31.17949
---------+--------------------------------------------------------------------
Combined |  11,540    31.42156    .1131675    12.15695    31.19973    31.64338
---------+--------------------------------------------------------------------
    diff |            1.049539    .2271177                .6043495    1.494728
------------------------------------------------------------------------------
    diff = mean(Male) - mean(Female)                              t =   4.6211
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       7.83
Statistics robust to heteroskedasticity           Prob > F        =     0.0053
                                                  R-squared       =     0.0018
                                                  Adj R-squared   =     0.0018
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0018
Number of clusters (id_worker) =        589       Root MSE        =    12.1462

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
         wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -1.049539   .3750316    -2.80   0.005    -1.786135   -.3129425
        _cons |   31.89758   .4141395    77.02   0.000     31.08417    32.71099
-------------------------------------------------------------------------------

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
    Male |   6,306    38.29575    .1077198    8.554063    38.08458    38.50692
  Female |   5,234    34.97669    .1207002    8.732222    34.74007    35.21331
---------+--------------------------------------------------------------------
Combined |  11,540    36.79038    .0818402    8.791634    36.62996     36.9508
---------+--------------------------------------------------------------------
    diff |            3.319059    .1614684                3.002554    3.635565
------------------------------------------------------------------------------
    diff = mean(Male) - mean(Female)                              t =  20.5555
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =      20.21
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0353
                                                  Adj R-squared   =     0.0352
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0353
Number of clusters (id_worker) =        589       Root MSE        =     8.6353

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
        score | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -3.319059   .7383027    -4.50   0.000    -4.769153   -1.868966
        _cons |   38.29575    .489446    78.24   0.000     37.33443    39.25707
-------------------------------------------------------------------------------

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
      US |   7,700    30.71042    .1390038    12.19754    30.43793     30.9829
   India |   3,840    32.84754    .1928403    11.94987    32.46946    33.22562
---------+--------------------------------------------------------------------
Combined |  11,540    31.42156    .1131675    12.15695    31.19973    31.64338
---------+--------------------------------------------------------------------
    diff |           -2.137125    .2393537               -2.606299   -1.667951
------------------------------------------------------------------------------
    diff = mean(US) - mean(India)                                 t =  -8.9287
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 1.0000
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =      26.82
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0069
                                                  Adj R-squared   =     0.0068
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0069
Number of clusters (id_worker) =        589       Root MSE        =    12.1157

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
        wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
india_worker |   2.137125   .4126746     5.18   0.000     1.326594    2.947655
       _cons |   30.71042   .4072324    75.41   0.000     29.91058    31.51026
------------------------------------------------------------------------------

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
      US |   7,700    37.00597    .1018027    8.933154    36.80641    37.20554
   India |   3,840    36.35807    .1369328    8.485414     36.0896    36.62654
---------+--------------------------------------------------------------------
Combined |  11,540    36.79038    .0818402    8.791634    36.62996     36.9508
---------+--------------------------------------------------------------------
    diff |            .6479011    .1735874                .3076403     .988162
------------------------------------------------------------------------------
    diff = mean(US) - mean(India)                                 t =   3.7324
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.9999         Pr(|T| > |t|) = 0.0002          Pr(T > t) = 0.0001
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       0.69
Statistics robust to heteroskedasticity           Prob > F        =     0.4079
                                                  R-squared       =     0.0012
                                                  Adj R-squared   =     0.0011
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0012
Number of clusters (id_worker) =        589       Root MSE        =     8.7867

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
       score | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
india_worker |  -.6479011   .7823047    -0.83   0.408    -2.184419    .8886165
       _cons |   37.00597   .4648426    79.61   0.000     36.09298    37.91897
------------------------------------------------------------------------------

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
Under 33 |   6,139    31.67262    .1531807    12.00198    31.37233    31.97291
 Over 33 |   5,401    31.13619    .1677152    12.32564     30.8074    31.46498
---------+--------------------------------------------------------------------
Combined |  11,540    31.42156    .1131675    12.15695    31.19973    31.64338
---------+--------------------------------------------------------------------
    diff |            .5364262    .2267541                .0919497    .9809028
------------------------------------------------------------------------------
    diff = mean(Under 33) - mean(Over 33)                         t =   2.3657
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.9910         Pr(|T| > |t|) = 0.0180          Pr(T > t) = 0.0090
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       1.88
Statistics robust to heteroskedasticity           Prob > F        =     0.1709
                                                  R-squared       =     0.0005
                                                  Adj R-squared   =     0.0004
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0005
Number of clusters (id_worker) =        589       Root MSE        =    12.1545

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
        wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  old_worker |  -.5364262   .3912425    -1.37   0.171    -1.304862    .2320096
       _cons |   31.67262   .4095437    77.34   0.000     30.86824      32.477
------------------------------------------------------------------------------

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
Under 33 |   6,139    36.96156    .1100297    8.621025    36.74586    37.17725
 Over 33 |   5,401    36.59582      .12217    8.978458    36.35631    36.83532
---------+--------------------------------------------------------------------
Combined |  11,540    36.79038    .0818402    8.791634    36.62996     36.9508
---------+--------------------------------------------------------------------
    diff |            .3657417    .1639879                .0442976    .6871858
------------------------------------------------------------------------------
    diff = mean(Under 33) - mean(Over 33)                         t =   2.2303
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.9871         Pr(|T| > |t|) = 0.0257          Pr(T > t) = 0.0129
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       0.24
Statistics robust to heteroskedasticity           Prob > F        =     0.6276
                                                  R-squared       =     0.0004
                                                  Adj R-squared   =     0.0003
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0004
Number of clusters (id_worker) =        589       Root MSE        =     8.7901

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
       score | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  old_worker |  -.3657417   .7535764    -0.49   0.628    -1.845834    1.114351
       _cons |   36.96156    .502913    73.49   0.000     35.97379    37.94932
------------------------------------------------------------------------------

. global label_female "Gender (1 = Male, 2 = Female)"

. global label_india "Country (1 = US, 2 = India)"

. global label_old "Age (1 = Under 33, 2 = Over 33)"

. 
. texdoc init "${tables}table2.tex", replace
(texdoc output file is /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Arc
> hive/Results/tables/table2.tex)

. tex {\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}

. tex \begin{tabular}{l*{6}{c}}

. tex \hline\hline

. tex &\textbf{Group 1}& \textbf{Group 2}&\textbf{Diff.} &\textbf{p-val} & \textbf{\#Obs. G1}  & \textbf
> {\#Obs. G2}\\

. tex &\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}&\multicolumn{1}{c}{(3)}& ///
>   \multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}&\multicolumn{1}{c}{(6)}\\

. tex \hline

. tex \multicolumn{6}{l}{\textbf{Panel A: Employers' Wage WTP, by Employee Characteristics}}\\      

. foreach X in female india old {
  2.     tex ${label_`X'} & `wage_mean0_`X'' & `wage_mean1_`X'' & `wage_diff_`X'' & `wage_p_`X'' ///
>       & `wage_N0_`X'' & `wage_N1_`X'' \\
  3.     tex              & (`wage_sd0_`X'') & (`wage_sd1_`X'') &            &         &          &   \\
  4. }   

. tex &&&&&&\\                                             

. tex \multicolumn{6}{l}{\textbf{Panel B: Employee Productivity, by Employee Characteristics}}\\        
>  

. foreach X in female india old {
  2.     tex ${label_`X'} & `score_mean0_`X'' & `score_mean1_`X'' & `score_diff_`X'' & `score_p_`X'' ///
>       & `score_N0_`X'' & `score_N1_`X'' \\
  3.     tex              & (`score_sd0_`X'') & (`score_sd1_`X'') &            &         &          &  \
> \
  4. }   

. tex \hline\hline

. tex \end{tabular}}    

. texdoc close
(texdoc output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication 
> Archive/Results/tables/table2.tex)

. 
. * Table 3: Beliefs about Productivity by Employee Characteristics
. ttest pred_male=pred_female if tag_employer==1

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
pred_m~e |     577    34.03666    .3438166    8.258757    33.36137    34.71194
pred_f~e |     577    32.14471    .3502566    8.413453    31.45678    32.83265
---------+--------------------------------------------------------------------
    diff |     577    1.891941    .2364623    5.680019    1.427508    2.356375
------------------------------------------------------------------------------
     mean(diff) = mean(pred_male - pred_female)                   t =   8.0010
 H0: mean(diff) = 0                              Degrees of freedom =      576

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000

. local pred_mean0_female=r(mu_1)

. local pred_mean1_female=r(mu_2)

. local pred_sd0_female=r(sd_1)

. local pred_sd1_female=r(sd_2)

. local pred_diff_female=r(mu_1)-r(mu_2)

. local pred_p_female=r(p)

. 
. ttest pred_us=pred_india if tag_employer==1

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
 pred_us |     577    32.07591    .3562374    8.557117    31.37623    32.77559
pred_i~a |     577        34.8    .3928682    9.437018    34.02837    35.57163
---------+--------------------------------------------------------------------
    diff |     577    -2.72409    .3268432    7.851044    -3.36604    -2.08214
------------------------------------------------------------------------------
     mean(diff) = mean(pred_us - pred_india)                      t =  -8.3345
 H0: mean(diff) = 0                              Degrees of freedom =      576

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 0.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 1.0000

. local pred_mean0_india=r(mu_1)

. local pred_mean1_india=r(mu_2)

. local pred_sd0_india=r(sd_1)

. local pred_sd1_india=r(sd_2)

. local pred_diff_india=r(mu_1)-r(mu_2)

. local pred_p_india=r(p)

. 
. ttest pred_young=pred_old if tag_employer==1

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
pred_y~g |     577    33.40919    .3732899    8.966732    32.67601    34.14236
pred_old |     577     31.5713    .3746324    8.998979    30.83549    32.30711
---------+--------------------------------------------------------------------
    diff |     577    1.837886    .2919933    7.013919    1.264384    2.411387
------------------------------------------------------------------------------
     mean(diff) = mean(pred_young - pred_old)                     t =   6.2943
 H0: mean(diff) = 0                              Degrees of freedom =      576

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000

. local pred_mean0_old=r(mu_1)

. local pred_mean1_old=r(mu_2)

. local pred_sd0_old=r(sd_1)

. local pred_sd1_old=r(sd_2)

. local pred_diff_old=r(mu_1)-r(mu_2)

. local pred_p_old=r(p)

. 
. foreach X in female india old {
  2.     foreach Y in mean0 mean1 sd0 sd1 diff p{
  3.         local pred_`Y'_`X': di %6.2f `pred_`Y'_`X'' 
  4.     } 
  5. }

. 
. texdoc init "${tables}table3.tex", replace  
(texdoc output file is /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Arc
> hive/Results/tables/table3.tex)

. tex {\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}

. tex \begin{tabular}{l*{4}{c}}

. tex \hline\hline

. tex &\textbf{Group 1}&\textbf{Group 2}&\textbf{Diff.} &\textbf{p-val}\\

. tex & (1) & (2) & (3) & (4)\\

. tex \hline

. foreach X in female india old {
  2.     tex ${label_`X'} & `pred_mean0_`X'' & `pred_mean1_`X'' & `pred_diff_`X'' & `pred_p_`X'' \\
  3.     tex              & (`pred_sd0_`X'') & (`pred_sd1_`X'') &            &            \\
  4. }   

. tex \hline\hline

. tex \end{tabular}}    

. texdoc close
(texdoc output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication 
> Archive/Results/tables/table3.tex)

. 
. * Table 4: Effect of Information - (Difference-in-Differences by Hiring Task)
. gen postinfo=hiringtask==2

. for Y in any female_worker india_worker old_worker: gen postinfoXY=postinfo*Y

->  gen postinfoXfemale_worker=postinfo*female_worker

->  gen postinfoXindia_worker=postinfo*india_worker

->  gen postinfoXold_worker=postinfo*old_worker

. 
. eststo M1: reghdfe wage female_worker postinfo postinfoXfemale_worker, ///
>   cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =      15.70
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0055
                                                  Adj R-squared   =     0.0053
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0055
Number of clusters (id_worker) =        589       Root MSE        =    11.8140

                          (Std. err. adjusted for 577 clusters in id_employer id_worker)
----------------------------------------------------------------------------------------
                       |               Robust
                  wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-----------------------+----------------------------------------------------------------
         female_worker |  -1.049539   .3750479    -2.80   0.005    -1.786167   -.3129106
              postinfo |   1.526306   .3085839     4.95   0.000     .9202187    2.132392
postinfoXfemale_worker |  -.6439247   .3779556    -1.70   0.089    -1.386264    .0984144
                 _cons |   31.89758   .4141575    77.02   0.000     31.08414    32.71102
----------------------------------------------------------------------------------------

.  sum wage if e(sample) & female_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,306    31.89758    12.07482          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.897578

. eststo M2: reghdfe wage india_worker postinfo postinfoXindia_worker, ///
>   cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =      19.57
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0079
                                                  Adj R-squared   =     0.0078
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0079
Number of clusters (id_worker) =        589       Root MSE        =    11.7995

                         (Std. err. adjusted for 577 clusters in id_employer id_worker)
---------------------------------------------------------------------------------------
                      |               Robust
                 wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
----------------------+----------------------------------------------------------------
         india_worker |   2.137125   .4126924     5.18   0.000     1.326559     2.94769
             postinfo |   1.600014   .2728984     5.86   0.000     1.064017    2.136011
postinfoXindia_worker |  -1.070901   .4258522    -2.51   0.012    -1.907313   -.2344881
                _cons |   30.71042     .40725    75.41   0.000     29.91054    31.51029
---------------------------------------------------------------------------------------

.  sum wage if e(sample) & india_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      7,700    30.71042    12.19754          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.710417

. eststo M3: reghdfe wage old_worker postinfo postinfoXold_worker, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =       8.87
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0028
                                                  Adj R-squared   =     0.0027
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0028
Number of clusters (id_worker) =        589       Root MSE        =    11.8296

                       (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------------
                    |               Robust
               wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------------+----------------------------------------------------------------
         old_worker |  -.5364262   .3912595    -1.37   0.171    -1.304895    .2320429
           postinfo |   1.063593   .3082804     3.45   0.001      .458102    1.669084
postinfoXold_worker |   .4104379   .4171025     0.98   0.326    -.4087894    1.229665
              _cons |   31.67262   .4095615    77.33   0.000      30.8682    32.47703
-------------------------------------------------------------------------------------

.  sum wage if e(sample) & old_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,139    31.67262    12.00198          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.672618

. eststo M4: reghdfe wage female_worker india_worker old_worker postinfo ///
>    postinfoXfemale_worker postinfoXindia_worker postinfoXold_worker, ///
>    cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   7,    576) =      10.79
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0098
                                                  Adj R-squared   =     0.0094
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0098
Number of clusters (id_worker) =        589       Root MSE        =    11.7897

                          (Std. err. adjusted for 577 clusters in id_employer id_worker)
----------------------------------------------------------------------------------------
                       |               Robust
                  wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-----------------------+----------------------------------------------------------------
         female_worker |  -.6559822   .3732116    -1.76   0.079    -1.389004    .0770394
          india_worker |   2.011497   .4325364     4.65   0.000     1.161956    2.861038
            old_worker |   .0593558   .3909682     0.15   0.879    -.7085413    .8272529
              postinfo |   1.973596    .394589     5.00   0.000     1.198588    2.748605
postinfoXfemale_worker |  -.8937331   .3804801    -2.35   0.019    -1.641031   -.1464355
 postinfoXindia_worker |  -1.197357   .4365695    -2.74   0.006    -2.054819   -.3398948
   postinfoXold_worker |   .1219369   .4209261     0.29   0.772    -.7048004    .9486741
                 _cons |   31.02196   .4973229    62.38   0.000     30.04518    31.99875
----------------------------------------------------------------------------------------

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. eststo M5: reghdfe wage female_worker india_worker old_worker postinfo ///
>    postinfoXfemale_worker postinfoXindia_worker postinfoXold_worker, ///
>    abs(id_employer) cluster(id_employer id_worker)
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   7,    576) =      14.49
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.4755
                                                  Adj R-squared   =     0.4573
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0184
Number of clusters (id_worker) =        589       Root MSE        =     8.7266

                          (Std. err. adjusted for 577 clusters in id_employer id_worker)
----------------------------------------------------------------------------------------
                       |               Robust
                  wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-----------------------+----------------------------------------------------------------
         female_worker |  -.7972794   .3299796    -2.42   0.016    -1.445389   -.1491695
          india_worker |   2.016127   .3836411     5.26   0.000     1.262621    2.769633
            old_worker |   .2891246   .3506602     0.82   0.410     -.399604    .9778532
              postinfo |   2.331405   .3417843     6.82   0.000      1.66011    3.002701
postinfoXfemale_worker |  -1.007013   .2891145    -3.48   0.001     -1.57486    -.439166
 postinfoXindia_worker |  -1.645537   .3290897    -5.00   0.000    -2.291899   -.9991752
   postinfoXold_worker |  -.2103353   .3065714    -0.69   0.493    -.8124695    .3917989
                 _cons |   30.97697   .3227007    95.99   0.000     30.34316    31.61078
----------------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------+
 Absorbed FE | Categories  - Redundant  = Num. Coefs |
-------------+---------------------------------------|
 id_employer |       577         577           0    *|
-----------------------------------------------------+
* = FE nested within cluster; treated as redundant for DoF computation

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. 
. #d ;
delimiter now ;
. esttab M1 M2 M3 M4 M5 using "${tables}table4.tex", replace 
>   keep(postinfo female_worker postinfoXfemale_worker india_worker postinfoXindia_worker 
>   old_worker postinfoXold_worker)
>   order(postinfo female_worker postinfoXfemale_worker india_worker postinfoXindia_worker 
>   old_worker postinfoXold_worker)
>   coeflabel(postinfo "Post-Info" female_worker "Female" postinfoXfemale_worker "Female X Post-Info"
>   india_worker "Indian" postinfoXindia_worker "Indian X Post-Info" old_worker "Over 33"
>   postinfoXold_worker "Over 33 X Post-Info") cells(b(star fmt(2)) se(par fmt(2))) 
>   legend sty(fixed) nomtitles
>   star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%12.0gc 2 2) label("N" "\$R^2$"))  
>   postfoot(`"Employer FE? &No&No&No&No&Yes \\"'
>   `"\hline\hline"' `"\bottomrule"' `"\multicolumn{6}{l}{\footnotesize \sym{*} \(p<0.10\),
>   \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') label noabbrev ;
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/table4.tex)

. #d cr
delimiter now cr
. 
. ****************************************************************************************************
. *  3. Appendix Tables and Figures
. ****************************************************************************************************
. 
. * Figure B1: Productivity Distributions by Group
. twoway kdensity score if female_worker==1, bwidth(1) lwidth(thick) || kdensity score ///
>   if female_worker==0, bwidth(1) lpattern(-) lwidth(thick) ///
>   legend(label(1 "Female") label(2 "Male")) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Trivia Score") name(female, replace) nodraw

. 
. twoway kdensity score if india_worker==1, bwidth(1) lwidth(thick) || kdensity score ///
>   if india_worker==0, bwidth(1)  lpattern(-) lwidth(thick) ///
>   legend(label(1 "Indian") label(2 "American")) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Trivia Score") name(india, replace) nodraw

. 
. twoway kdensity score if old_worker==1, bwidth(1) lwidth(thick) || kdensity score ///
>   if old_worker==0, bwidth(1)  lpattern(-) lwidth(thick) ///
>   legend(label(1 "Over 33") label(2 "Under 33")) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Trivia Score") name(old, replace) nodraw

. graph combine female india old, col(3) iscale(*.75) graphregion(fcolor(white) lcolor(white))

. graph export "${figures}figureB1.png", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Results/figures/figureB1.png saved as PNG format

. 
. * Figure B2: Productivity Distributions by Group
. gen MF_diff = pred_male-pred_female

. gen UI_diff = pred_us-pred_india

. gen OY_diff = pred_old-pred_young

. twoway kdensity MF_diff if tag_employer==1, bwidth(1) lwidth(thick) legend(off) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Beliefs About Men vs. Women") name(female, replace) xline(0) nodraw

. twoway kdensity UI_diff if tag_employer==1, bwidth(1) lwidth(thick) legend(off) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Beliefs About American vs. Indian") name(india, replace) xline(0) nodraw

. twoway kdensity OY_diff if tag_employer==1, bwidth(1) lwidth(thick) legend(off) ///
>   graphregion(fcolor(white) lcolor(white)) bgcolor(white) ytitle("Kernal Density") ///
>   xtitle("Beliefs About Over 33 vs. Under 33") name(old, replace) xline(0) nodraw

. graph combine female india old, col(3) iscale(*.75) graphregion(fcolor(white) lcolor(white))

. graph export "${figures}figureB2.png", replace
file /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication
    Archive/Results/figures/figureB2.png saved as PNG format

. 
. * Table B1: Summary Statistics
. global summary_worker "score duration_worker preftea_worker age_worker female_worker india_worker"

. global summary_employer "duration_employer educ_college_employer age_employer female_employer india_em
> ployer"

. 
. * NOTE: For some reason this first part sometimes gets dropped from the output when running the full
. *       do file. When pasting in these lines manually it works fine. If the rerun is missing
. *       the Worker Male/Female split from tableB1.tex, then paste/run these lines manually in Stata
. estpost tabstat $summary_worker if tag_worker==1, by(female_worker) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: score duration_worker preftea_worker age_worker female_worker india_worker
  by categories of: female_worker

female_worke |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
Male         |                                 
       score |  38.32099   8.518531        324 
duration_w~r |   19.0285   10.52399        324 
preftea_wo~r |  .3796296   .4860454        324 
  age_worker |   35.2963   11.26676        324 
female_wor~r |         0          0        324 
india_worker |  .4166667   .4937692        324 
-------------+---------------------------------
Female       |                                 
       score |  35.28302   8.703834        265 
duration_w~r |  18.55748   10.24736        265 
preftea_wo~r |  .4113208   .4930042        265 
  age_worker |  36.61887   11.90723        265 
female_wor~r |         1          0        265 
india_worker |  .2339623   .4241495        265 
-------------+---------------------------------
Total        |                                 
       score |  36.95416   8.727134        589 
duration_w~r |  18.81658   10.39429        589 
preftea_wo~r |  .3938879   .4890258        589 
  age_worker |  35.89134   11.56812        589 
female_wor~r |  .4499151    .497908        589 
india_worker |  .3344652   .4722042        589 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber replace
(tabulating estimates stored by eststo; specify "." to tabulate the active results)
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. estpost tabstat $summary_worker if tag_worker==1, by(india_worker) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: score duration_worker preftea_worker age_worker female_worker india_worker
  by categories of: india_worker

india_worker |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
US           |                                 
       score |  37.14031   8.933297        392 
duration_w~r |  16.19111   8.117989        392 
preftea_wo~r |   .369898   .4833938        392 
  age_worker |  38.54592   12.16249        392 
female_wor~r |  .5178571   .5003196        392 
india_worker |         0          0        392 
-------------+---------------------------------
India        |                                 
       score |  36.58376   8.311496        197 
duration_w~r |  24.04086   12.30602        197 
preftea_wo~r |  .4416244   .4978458        197 
  age_worker |  30.60914   8.014306        197 
female_wor~r |  .3147208   .4655878        197 
india_worker |         1          0        197 
-------------+---------------------------------
Total        |                                 
       score |  36.95416   8.727134        589 
duration_w~r |  18.81658   10.39429        589 
preftea_wo~r |  .3938879   .4890258        589 
  age_worker |  35.89134   11.56812        589 
female_wor~r |  .4499151    .497908        589 
india_worker |  .3344652   .4722042        589 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber append
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. estpost tabstat $summary_worker if tag_worker==1, by(old_worker) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: score duration_worker preftea_worker age_worker female_worker india_worker
  by categories of: old_worker

  old_worker |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
Under 33     |                                 
       score |  37.10191   8.549188        314 
duration_w~r |   20.2518   11.81901        314 
preftea_wo~r |  .4235669   .4949121        314 
  age_worker |  27.37898    3.50349        314 
female_wor~r |  .4267516    .495395        314 
india_worker |  .4681529   .4997812        314 
-------------+---------------------------------
Over 33      |                                 
       score |  36.78545   8.938627        275 
duration_w~r |  17.17782   8.199227        275 
preftea_wo~r |       .36   .4808751        275 
  age_worker |  45.61091    9.76239        275 
female_wor~r |  .4763636   .5003516        275 
india_worker |  .1818182   .3863978        275 
-------------+---------------------------------
Total        |                                 
       score |  36.95416   8.727134        589 
duration_w~r |  18.81658   10.39429        589 
preftea_wo~r |  .3938879   .4890258        589 
  age_worker |  35.89134   11.56812        589 
female_wor~r |  .4499151    .497908        589 
india_worker |  .3344652   .4722042        589 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber append
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. estpost tabstat $summary_employer if tag_employer==1, by(female_employer) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: duration_employer educ_college_employer age_employer female_employer india_employer
  by categories of: female_employer

female_emplo |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
Male         |                                 
duration_e~r |  23.58585   15.57212        344 
educ_colle~r |  .7005814   .4586705        344 
age_employer |  32.65988   9.916739        344 
female_emp~r |         0          0        344 
india_empl~r |  .4127907    .493053        344 
-------------+---------------------------------
Female       |                                 
duration_e~r |  22.36996   19.42854        233 
educ_colle~r |  .6223176   .4858513        233 
age_employer |  36.87983   12.06888        233 
female_emp~r |         1          0        233 
india_empl~r |  .1845494   .3887667        233 
-------------+---------------------------------
Total        |                                 
duration_e~r |  23.09486   17.22766        577 
educ_colle~r |  .6689775   .4709895        577 
age_employer |  34.36395   11.02377        577 
female_emp~r |  .4038128   .4910865        577 
india_empl~r |  .3206239   .4671214        577 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber append
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. estpost tabstat $summary_employer if tag_employer==1, by(india_employer) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: duration_employer educ_college_employer age_employer female_employer india_employer
  by categories of: india_employer

india_employ |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
US           |                                 
duration_e~r |  19.08027   11.70495        392 
educ_colle~r |  .5586735   .4971801        392 
age_employer |  35.73214   11.63499        392 
female_emp~r |  .4846939   .5004043        392 
india_empl~r |         0          0        392 
-------------+---------------------------------
India        |                                 
duration_e~r |  31.60144    23.0404        185 
educ_colle~r |  .9027027   .2971664        185 
age_employer |  31.46486   8.961882        185 
female_emp~r |  .2324324   .4235295        185 
india_empl~r |         1          0        185 
-------------+---------------------------------
Total        |                                 
duration_e~r |  23.09486   17.22766        577 
educ_colle~r |  .6689775   .4709895        577 
age_employer |  34.36395   11.02377        577 
female_emp~r |  .4038128   .4910865        577 
india_empl~r |  .3206239   .4671214        577 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber append
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. estpost tabstat $summary_employer if tag_employer==1, by(old_employer) statistics(mean sd N) ///
>   columns(statistics)

Summary statistics: mean sd count
     for variables: duration_employer educ_college_employer age_employer female_employer india_employer
  by categories of: old_employer

old_employer |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
Under 33     |                                 
duration_e~r |  22.53199   18.99991        334 
educ_colle~r |  .6706587   .4706792        334 
age_employer |  27.08982   3.587309        334 
female_emp~r |  .3413174   .4748631        334 
india_empl~r |  .3952096   .4896292        334 
-------------+---------------------------------
Over 33      |                                 
duration_e~r |  23.86852   14.44413        243 
educ_colle~r |  .6666667   .4723775        243 
age_employer |  44.36214   9.906199        243 
female_emp~r |  .4897119   .5009259        243 
india_empl~r |   .218107   .4138128        243 
-------------+---------------------------------
Total        |                                 
duration_e~r |  23.09486   17.22766        577 
educ_colle~r |  .6689775   .4709895        577 
age_employer |  34.36395   11.02377        577 
female_emp~r |  .4038128   .4910865        577 
india_empl~r |  .3206239   .4671214        577 

. esttab using "${tables}tableB1.tex", main(mean %8.2f) aux(sd %8.2f) label nostar unstack nomtitle nonu
> mber append
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB1.tex)

. est clear

. 
. * Table B2: Discrimination in Wages, by Employee Characteristics (Hiring Task 1)
. eststo M1: reghdfe wage female_worker if hiringtask==1, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       7.83
Statistics robust to heteroskedasticity           Prob > F        =     0.0053
                                                  R-squared       =     0.0018
                                                  Adj R-squared   =     0.0018
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0018
Number of clusters (id_worker) =        589       Root MSE        =    12.1462

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
         wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -1.049539   .3750316    -2.80   0.005    -1.786135   -.3129425
        _cons |   31.89758   .4141395    77.02   0.000     31.08417    32.71099
-------------------------------------------------------------------------------

.  sum wage if e(sample) & female_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,306    31.89758    12.07482          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.897578

. eststo M2: reghdfe wage india_worker if hiringtask==1, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =      26.82
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0069
                                                  Adj R-squared   =     0.0068
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0069
Number of clusters (id_worker) =        589       Root MSE        =    12.1157

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
        wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
india_worker |   2.137125   .4126746     5.18   0.000     1.326594    2.947655
       _cons |   30.71042   .4072324    75.41   0.000     29.91058    31.51026
------------------------------------------------------------------------------

.  sum wage if e(sample) & india_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      7,700    30.71042    12.19754          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.710417

. eststo M3: reghdfe wage old_worker if hiringtask==1, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   1,    576) =       1.88
Statistics robust to heteroskedasticity           Prob > F        =     0.1709
                                                  R-squared       =     0.0005
                                                  Adj R-squared   =     0.0004
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0005
Number of clusters (id_worker) =        589       Root MSE        =    12.1545

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
        wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  old_worker |  -.5364262   .3912425    -1.37   0.171    -1.304862    .2320096
       _cons |   31.67262   .4095437    77.34   0.000     30.86824      32.477
------------------------------------------------------------------------------

.  sum wage if e(sample) & old_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,139    31.67262    12.00198          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.672618

. eststo M4: reghdfe wage female_worker india_worker old_worker if hiringtask==1, ///
>   cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   3,    576) =      10.09
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0076
                                                  Adj R-squared   =     0.0073
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0076
Number of clusters (id_worker) =        589       Root MSE        =    12.1125

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
         wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -.6559822   .3731847    -1.76   0.079    -1.388951    .0769865
 india_worker |   2.011497   .4325051     4.65   0.000     1.162017    2.860976
   old_worker |   .0593558     .39094     0.15   0.879    -.7084858    .8271975
        _cons |   31.02196    .497287    62.38   0.000     30.04525    31.99868
-------------------------------------------------------------------------------

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. eststo M5: reghdfe wage female_worker india_worker old_worker if hiringtask==1, ///
>   abs(id_employer) cluster(id_employer id_worker)
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   3,    576) =      12.62
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.4862
                                                  Adj R-squared   =     0.4591
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0142
Number of clusters (id_worker) =        589       Root MSE        =     8.9410

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
         wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -.7831612   .3278989    -2.39   0.017    -1.427185   -.1391379
 india_worker |   2.030499   .3826393     5.31   0.000      1.27896    2.782037
   old_worker |   .3068368   .3494251     0.88   0.380     -.379466    .9931396
        _cons |    30.9575   .3170604    97.64   0.000     30.33476    31.58023
-------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------+
 Absorbed FE | Categories  - Redundant  = Num. Coefs |
-------------+---------------------------------------|
 id_employer |       577         577           0    *|
-----------------------------------------------------+
* = FE nested within cluster; treated as redundant for DoF computation

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. eststo M6: reghdfe wage female_worker india_worker old_worker preftea_worker if hiringtask==1, ///
>   abs(id_employer) cluster(id_employer id_worker)
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   4,    576) =       9.80
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.4864
                                                  Adj R-squared   =     0.4593
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0146
Number of clusters (id_worker) =        589       Root MSE        =     8.9396

                  (Std. err. adjusted for 577 clusters in id_employer id_worker)
--------------------------------------------------------------------------------
               |               Robust
          wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
---------------+----------------------------------------------------------------
 female_worker |  -.7976772   .3280725    -2.43   0.015    -1.442042   -.1533129
  india_worker |   2.003923   .3832138     5.23   0.000     1.251256     2.75659
    old_worker |   .3206476   .3489338     0.92   0.359      -.36469    1.005985
preftea_worker |   .3723924   .3171639     1.17   0.241    -.2505464    .9953312
         _cons |   30.82237   .3324062    92.73   0.000      30.1695    31.47525
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------+
 Absorbed FE | Categories  - Redundant  = Num. Coefs |
-------------+---------------------------------------|
 id_employer |       577         577           0    *|
-----------------------------------------------------+
* = FE nested within cluster; treated as redundant for DoF computation

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. eststo M7: reghdfe wage female_worker india_worker old_worker math_fav if hiringtask==1, ///
>   abs(id_employer) cluster(id_employer id_worker)
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   4,    576) =      56.09
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5193
                                                  Adj R-squared   =     0.4939
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0777
Number of clusters (id_worker) =        589       Root MSE        =     8.6487

                 (Std. err. adjusted for 577 clusters in id_employer id_worker)
-------------------------------------------------------------------------------
              |               Robust
         wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
--------------+----------------------------------------------------------------
female_worker |  -.6159186   .2668122    -2.31   0.021    -1.139962   -.0918752
 india_worker |   1.091934   .3189144     3.42   0.001     .4655567    1.718311
   old_worker |   .3461364   .2880231     1.20   0.230    -.2195672      .91184
     math_fav |   5.313423    .371775    14.29   0.000     4.583223    6.043623
        _cons |   29.81728   .2723413   109.48   0.000     29.28237    30.35218
-------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------+
 Absorbed FE | Categories  - Redundant  = Num. Coefs |
-------------+---------------------------------------|
 id_employer |       577         577           0    *|
-----------------------------------------------------+
* = FE nested within cluster; treated as redundant for DoF computation

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. eststo M8: reghdfe wage female_worker india_worker old_worker preftea_worker math_fav ///
>   blue_fav football_fav popmovie_fav if hiringtask==1, ///
>   abs(id_employer) cluster(id_employer id_worker)
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     11,540
Absorbing 1 HDFE group                            F(   8,    576) =      30.00
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5213
                                                  Adj R-squared   =     0.4958
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0815
Number of clusters (id_worker) =        589       Root MSE        =     8.6323

                  (Std. err. adjusted for 577 clusters in id_employer id_worker)
--------------------------------------------------------------------------------
               |               Robust
          wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
---------------+----------------------------------------------------------------
 female_worker |  -.4827774    .268038    -1.80   0.072    -1.009228    .0436736
  india_worker |   1.247198   .3151799     3.96   0.000     .6281564     1.86624
    old_worker |   .3456156   .2841976     1.22   0.224    -.2125744    .9038057
preftea_worker |   .3680327   .2594613     1.42   0.157    -.1415729    .8776383
      math_fav |   5.244152   .3680285    14.25   0.000     4.521311    5.966994
      blue_fav |    .179459   .2756425     0.65   0.515    -.3619279    .7208459
  football_fav |    .760699   .2985757     2.55   0.011     .1742691    1.347129
  popmovie_fav |   1.045297   .3132447     3.34   0.001     .4300559    1.660538
         _cons |   29.12469   .3601766    80.86   0.000     28.41727    29.83211
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------+
 Absorbed FE | Categories  - Redundant  = Num. Coefs |
-------------+---------------------------------------|
 id_employer |       577         577           0    *|
-----------------------------------------------------+
* = FE nested within cluster; treated as redundant for DoF computation

.  sum wage if e(sample) & female_worker==0 & india_worker==0 & old_worker==0 ///
>    & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      1,579    30.71196    12.04171          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.711957

. 
. 
. #d ;
delimiter now ;
. esttab M1 M2 M3 M4 M5 M6 M7 M8 using "${tables}tableB2.tex", replace 
>   keep(female_worker india_worker old_worker preftea_worker math_fav blue_fav 
>   football_fav popmovie_fav) coeflabel(female_worker "Female" india_worker "Indian" 
>   old_worker "Over 33" preftea_worker "Prefers Tea" math_fav "Fav Subject: Math" 
>   blue_fav "Fav Color: Blue" football_fav "Fav Sport: Football" popmovie_fav "Fav Movie: Popular")
>   cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) nomtitles
>   star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%12.0gc 2 2) label("N" "\$R^2$"))  
>   postfoot(`"Employer FE? &No&No&No&No&Yes&Yes&Yes&Yes \\"'
>   `"\hline\hline"' `"\bottomrule"' `"\multicolumn{9}{l}{\footnotesize \sym{*} \(p<0.10\),
>   \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') label noabbrev ;
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB2.tex)

. #d cr
delimiter now cr
. 
. 
.   
. * Table B3: In-Group Bias Test (Hiring Task 1)
. for Y in any female india old: gen Y_match=Y_worker*Y_employer

->  gen female_match=female_worker*female_employer

->  gen india_match=india_worker*india_employer

->  gen old_match=old_worker*old_employer

. 
. eststo M1: reghdfe wage female_worker female_employer female_match, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =       8.21
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0091
                                                  Adj R-squared   =     0.0089
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0091
Number of clusters (id_worker) =        589       Root MSE        =    11.7925

                   (Std. err. adjusted for 577 clusters in id_employer id_worker)
---------------------------------------------------------------------------------
                |               Robust
           wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
----------------+----------------------------------------------------------------
  female_worker |  -1.415598   .3734137    -3.79   0.000    -2.149017   -.6821799
female_employer |   1.778906   .6927963     2.57   0.010     .4181908    3.139621
   female_match |   .2628729   .4382241     0.60   0.549    -.5978391    1.123585
          _cons |   31.70907   .4877384    65.01   0.000     30.75111    32.66703
---------------------------------------------------------------------------------

.  sum wage if e(sample) & female_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,306    31.89758    12.07482          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.897578

. eststo M2: reghdfe wage india_worker india_employer india_match, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =       9.05
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0061
                                                  Adj R-squared   =     0.0059
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0061
Number of clusters (id_worker) =        589       Root MSE        =    11.8102

                  (Std. err. adjusted for 577 clusters in id_employer id_worker)
--------------------------------------------------------------------------------
               |               Robust
          wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
---------------+----------------------------------------------------------------
  india_worker |    2.04352   .4372425     4.67   0.000     1.184736    2.902304
india_employer |   .9857567   .7120902     1.38   0.167    -.4128533    2.384367
   india_match |  -.7893171    .505318    -1.56   0.119    -1.781808    .2031735
         _cons |   30.92143   .4958559    62.36   0.000     29.94753    31.89534
--------------------------------------------------------------------------------

.  sum wage if e(sample) & india_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      7,700    30.71042    12.19754          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  30.710417

. eststo M3: reghdfe wage old_worker old_employer old_match, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   3,    576) =       3.47
Statistics robust to heteroskedasticity           Prob > F        =     0.0161
                                                  R-squared       =     0.0020
                                                  Adj R-squared   =     0.0018
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0020
Number of clusters (id_worker) =        589       Root MSE        =    11.8347

                (Std. err. adjusted for 577 clusters in id_employer id_worker)
------------------------------------------------------------------------------
             |               Robust
        wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  old_worker |   -.861097   .3671171    -2.35   0.019    -1.582148   -.1400456
old_employer |   .3092722   .6925343     0.45   0.655    -1.050928    1.669473
   old_match |   1.101005   .4045403     2.72   0.007     .3064509    1.895559
       _cons |   31.89652   .4916385    64.88   0.000      30.9309    32.86215
------------------------------------------------------------------------------

.  sum wage if e(sample) & old_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,139    31.67262    12.00198          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.672618

. eststo M4: reghdfe wage female_worker female_employer female_match india_worker india_employer ///
>  india_match old_worker old_employer old_match, cluster(id_employer id_worker) noa
(MWFE estimator converged in 1 iterations)

HDFE Linear regression                            Number of obs   =     17,310
Absorbing 1 HDFE group                            F(   9,    576) =       6.40
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.0171
                                                  Adj R-squared   =     0.0166
Number of clusters (id_employer) =        577     Within R-sq.    =     0.0171
Number of clusters (id_worker) =        589       Root MSE        =    11.7467

                   (Std. err. adjusted for 577 clusters in id_employer id_worker)
---------------------------------------------------------------------------------
                |               Robust
           wage | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
----------------+----------------------------------------------------------------
  female_worker |  -1.195446    .374103    -3.20   0.001    -1.930219   -.4606741
female_employer |   1.909827   .7234623     2.64   0.009     .4888813    3.330773
   female_match |   .4067394   .4366204     0.93   0.352    -.4508229    1.264302
   india_worker |   1.876767   .4512668     4.16   0.000     .9904383    2.763096
 india_employer |   1.695717   .7479722     2.27   0.024     .2266318    3.164803
    india_match |  -.8245427   .5101889    -1.62   0.107      -1.8266    .1775148
     old_worker |  -.3890234   .3667168    -1.06   0.289    -1.109289    .3312417
   old_employer |     .22156   .7115227     0.31   0.756    -1.175935    1.619055
      old_match |   1.192828   .4010723     2.97   0.003     .4050859    1.980571
          _cons |   30.29853   .7473622    40.54   0.000     28.83064    31.76642
---------------------------------------------------------------------------------

.  sum wage if e(sample) & old_worker==0 & hiringtask==1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        wage |      6,139    31.67262    12.00198          0         50

.  estadd scalar DepVarMean=r(mean)

added scalar:
         e(DepVarMean) =  31.672618

. 
. #d ;
delimiter now ;
. esttab M1 M2 M3 M4 using "${tables}tableB3.tex", replace 
>   keep(female_worker female_employer female_match india_worker india_employer india_match 
>   old_worker old_employer old_match)
>   order(female_worker female_employer female_match india_worker india_employer india_match 
>   old_worker old_employer old_match)
>   coeflabel(female_worker "Female Worker" female_employer "Female Employer" female_match 
>   "Female Worker X Employer" india_worker "Indian Worker" india_employer "Indian Employer" 
>   india_match "Indian Worker X Employer" old_worker "Over 33 Worker" old_employer "Over 33 Employer"
>   old_match "Over 33 Worker X Employer") cells(b(star fmt(2)) se(par fmt(2))) legend sty(fixed) 
>   nomtitles star(* 0.10 ** 0.05 *** 0.01) stat(N r2 DepVarMean, fmt(%12.0gc 2 2) 
>   label("N" "\$R^2$"))  postfoot(`"\hline\hline"' `"\bottomrule"' 
>   `"\multicolumn{5}{l}{\footnotesize \sym{*} \(p<0.10\),
>   \sym{**} \(p<0.05\), \sym{***} \(p<0.01\)} \\"' `"\end{tabular}"' `"}"') label noabbrev ;
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB3.tex)

. #d cr
delimiter now cr
. 
. * Table B4: In-Group vs. Out-Group Beliefs about Productivity by Employee Characteristics
. gen male_employer=abs(female_employer-1)

. gen us_employer=abs(india_employer-1)

. gen young_employer=abs(old_employer-1)

. 
. foreach X in female male india us old young {
  2.     qui ttest pred_`X' if tag_employer==1, by(`X'_employer)
  3.     local mean0_`X'=r(mu_1)
  4.     local mean1_`X'=r(mu_2)
  5.     local sd0_`X'=r(sd_1)
  6.     local sd1_`X'=r(sd_2)
  7.     local N0_`X'=r(N_1)
  8.     local N1_`X'=r(N_2)
  9.     local diff_`X'=r(mu_1)-r(mu_2)
 10.     local p_`X'=r(p)
 11.     
.     local mean0_`X': di %6.2f `mean0_`X'' 
 12.     local sd0_`X': di %6.2f `sd0_`X'' 
 13.     local N0_`X': di %12.0gc `N0_`X'' 
 14.     local mean1_`X': di %6.2f `mean1_`X'' 
 15.     local sd1_`X': di %6.2f `sd1_`X'' 
 16.     local N1_`X': di %12.0gc `N1_`X'' 
 17.     local p_`X': di %6.2f `p_`X'' 
 18.     local diff_`X': di %6.2f `diff_`X''   
 19. }

. global label_female "Prediction for Female Workers"

. global label_male "Prediction for Male Workers"

. global label_india "Prediction for Indian Workers"

. global label_us "Prediction for US Workers"

. global label_old "Prediction for Over 33 Workers"

. global label_young "Prediction for Under 33 Workers"

. 
. texdoc init "${tables}tableB4.tex", replace
(texdoc output file is /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Arc
> hive/Results/tables/tableB4.tex)

. tex {\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}

. tex \begin{tabular}{l*{6}{c}}

. tex \hline\hline

. tex &\textbf{Out}&\textbf{In}&\textbf{Diff.}&\textbf{p-val}&\textbf{\#Obs.}&\textbf{\#Obs.}\\

. tex &\textbf{Group}&\textbf{Group}&&&\textbf{Out}&\textbf{In}  \\

. tex &\multicolumn{1}{c}{(1)}&\multicolumn{1}{c}{(2)}&\multicolumn{1}{c}{(3)}& ///
>   \multicolumn{1}{c}{(4)}&\multicolumn{1}{c}{(5)}&\multicolumn{1}{c}{(6)}\\

. tex \hline

. foreach X in female male india us old young {
  2.     tex ${label_`X'} & `mean0_`X'' & `mean1_`X'' & `diff_`X'' & `p_`X'' ///
>       & `N0_`X'' & `N1_`X'' \\
  3.     tex & (`sd0_`X'') & (`sd1_`X'') &            &         &          &   \\
  4. }   

. tex \hline\hline

. tex \end{tabular}}    

. texdoc close
(texdoc output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication 
> Archive/Results/tables/tableB4.tex)

. drop male_employer us_employer young_employer

. 
. * Predictions about group difference/gaps, by own group identity (as discussed in the text)
. gen pred_gendergap = pred_male - pred_female

. gen pred_nationgap = pred_us - pred_india 

. gen pred_agegap = pred_young - pred_old

. ttest pred_gendergap if tag_employer==1, by(female_employer)

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
    Male |     344    1.892878    .2869145    5.321472    1.328544    2.457211
  Female |     233    1.890558     .405083    6.183323    1.092446    2.688669
---------+--------------------------------------------------------------------
Combined |     577    1.891941    .2364623    5.680019    1.427508    2.356375
---------+--------------------------------------------------------------------
    diff |              .00232     .482345                -.945053     .949693
------------------------------------------------------------------------------
    diff = mean(Male) - mean(Female)                              t =   0.0048
H0: diff = 0                                     Degrees of freedom =      575

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.5019         Pr(|T| > |t|) = 0.9962          Pr(T > t) = 0.4981

. ttest pred_nationgap if tag_employer==1, by(india_employer)

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
      US |     392   -3.254082    .3787715    7.499292   -3.998765   -2.509398
   India |     185   -1.601081    .6220342     8.46058   -2.828318   -.3738446
---------+--------------------------------------------------------------------
Combined |     577    -2.72409    .3268432    7.851044    -3.36604    -2.08214
---------+--------------------------------------------------------------------
    diff |           -1.653001    .6975143               -3.022987   -.2830141
------------------------------------------------------------------------------
    diff = mean(US) - mean(India)                                 t =  -2.3698
H0: diff = 0                                     Degrees of freedom =      575

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.0091         Pr(|T| > |t|) = 0.0181          Pr(T > t) = 0.9909

. ttest pred_agegap if tag_employer==1, by(old_employer)

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
Under 33 |     334    2.166198    .3760654    6.872846    1.426434    2.905961
 Over 33 |     243    1.386626    .4614382    7.193109    .4776776    2.295573
---------+--------------------------------------------------------------------
Combined |     577    1.837886    .2919933    7.013919    1.264384    2.411387
---------+--------------------------------------------------------------------
    diff |            .7795721    .5910082               -.3812262     1.94037
------------------------------------------------------------------------------
    diff = mean(Under 33) - mean(Over 33)                         t =   1.3191
H0: diff = 0                                     Degrees of freedom =      575

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.9062         Pr(|T| > |t|) = 0.1877          Pr(T > t) = 0.0938

. drop pred_gendergap pred_nationgap pred_agegap

. 
. * Table B5: Effects of Large Incentives for Accurate Predictions
. *   NOTE: Requires combining the two files into one for tableB5.tex
. global predictions "pred_female pred_male pred_india pred_us pred_old pred_young"

. eststo sumstat: estpost tabstat $predictions if tag_employer==1, by(incentivized) ///
>   col(stats) stats(mean sd N)

Summary statistics: mean sd count
     for variables: pred_female pred_male pred_india pred_us pred_old pred_young
  by categories of: incentivized

incentivized |   e(mean)      e(sd)   e(count) 
-------------+---------------------------------
0            |                                 
 pred_female |   32.3619   7.711706        290 
   pred_male |  34.21586    7.37007        290 
  pred_india |  35.28828   8.491098        290 
     pred_us |  32.28121   8.211771        290 
    pred_old |  31.94531    8.38897        290 
  pred_young |  33.72931   8.575489        290 
-------------+---------------------------------
1            |                                 
 pred_female |  31.92526   9.075903        287 
   pred_male |  33.85557   9.078266        287 
  pred_india |  34.30662   10.29656        287 
     pred_us |  31.86847   8.902031        287 
    pred_old |  31.19338    9.57589        287 
  pred_young |  33.08571   9.349318        287 
-------------+---------------------------------
Total        |                                 
 pred_female |  32.14471   8.413453        577 
   pred_male |  34.03666   8.258757        577 
  pred_india |      34.8   9.437018        577 
     pred_us |  32.07591   8.557117        577 
    pred_old |   31.5713   8.998979        577 
  pred_young |  33.40919   8.966732        577 

. esttab sumstat using "${tables}tableB5_sumstats.tex", replace main(mean) aux(sd) nostar ///
>   unstack nonote label cells(mean(fmt(%6.2f)) sd(fmt(%6.2f) par))
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB5_sumstats.tex)

. eststo ttest: estpost ttest $predictions if tag_employer==1, by(incentivized)

             |      e(b)   e(count)      e(se)       e(t)    e(df_t)     e(p_l)       e(p)     e(p_u) 
-------------+----------------------------------------------------------------------------------------
 pred_female |  .4366353        577   .7008951   .6229681        575   .7332237   .5335525   .2667763 
   pred_male |  .3602872        577   .6880761   .5236153        575   .6996261   .6007479   .3003739 
  pred_india |  .9816557        577   .7853637   1.249938        575   .8940846   .2118308   .1059154 
     pred_us |    .41274        577    .712896   .5789624        575   .7185794   .5628412   .2814206 
    pred_old |  .7519306        577   .7492703   1.003551        575   .8419913   .3160174   .1580087 
  pred_young |  .6435961        577   .7467567   .8618551        575   .8054367   .3891265   .1945633 

             |    e(N_1)    e(mu_1)     e(N_2)    e(mu_2) 
-------------+--------------------------------------------
 pred_female |       290    32.3619        287   31.92526 
   pred_male |       290   34.21586        287   33.85557 
  pred_india |       290   35.28828        287   34.30662 
     pred_us |       290   32.28121        287   31.86847 
    pred_old |       290   31.94531        287   31.19338 
  pred_young |       290   33.72931        287   33.08571 

. esttab ttest using "${tables}tableB5_diff.tex", replace wide cells((b(fmt(%6.2f) star) ///
>   p(fmt(2)))) nonote label refcat("\textbf{Predictions by Worker Characteristics}", nolabel) ///
>   varlabels(pred_female "1" pred_male "2" pred_india "3"  pred_us "4" pred_old ///
>   "5" pred_young "6") star(* 0.10 ** 0.05 *** 0.01)
(output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive
> /Results/tables/tableB5_diff.tex)

. 
. * Table B6: Beliefs about Productivity by Employee Characteristics, Trimmed
. for X in any MF UI OY: winsor2 X_diff if tag_employer==1, cuts(5 95) trim

->  winsor2 MF_diff if tag_employer==1, cuts(5 95) trim

->  winsor2 UI_diff if tag_employer==1, cuts(5 95) trim

->  winsor2 OY_diff if tag_employer==1, cuts(5 95) trim

. ttest pred_male=pred_female if tag_employer==1 & MF_diff_tr != .

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
pred_m~e |     528    34.25625    .3581619    8.229934    33.55265    34.95985
pred_f~e |     528    32.30028    .3568181    8.199055    31.59932    33.00124
---------+--------------------------------------------------------------------
    diff |     528    1.955966    .1832968    4.211839    1.595884    2.316048
------------------------------------------------------------------------------
     mean(diff) = mean(pred_male - pred_female)                   t =  10.6710
 H0: mean(diff) = 0                              Degrees of freedom =      527

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000

. local pred_mean0_female=r(mu_1)

. local pred_mean1_female=r(mu_2)

. local pred_sd0_female=r(sd_1)

. local pred_sd1_female=r(sd_2)

. local pred_diff_female=r(mu_1)-r(mu_2)

. local pred_p_female=r(p)

. 
. ttest pred_us=pred_india if tag_employer==1 & UI_diff_tr != .

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
 pred_us |     541    31.99593    .3651225    8.492533     31.2787    32.71317
pred_i~a |     541    35.21368    .3806272    8.853163    34.46599    35.96137
---------+--------------------------------------------------------------------
    diff |     541   -3.217745    .2378079     5.53127   -3.684887   -2.750603
------------------------------------------------------------------------------
     mean(diff) = mean(pred_us - pred_india)                      t = -13.5309
 H0: mean(diff) = 0                              Degrees of freedom =      540

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 0.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 1.0000

. local pred_mean0_india=r(mu_1)

. local pred_mean1_india=r(mu_2)

. local pred_sd0_india=r(sd_1)

. local pred_sd1_india=r(sd_2)

. local pred_diff_india=r(mu_1)-r(mu_2)

. local pred_p_india=r(p)

. 
. ttest pred_young=pred_old if tag_employer==1 & OY_diff_tr != .

Paired t test
------------------------------------------------------------------------------
Variable |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
pred_y~g |     528    33.42254    .3848345    8.842824    32.66654    34.17854
pred_old |     528     31.7836    .3844424    8.833815    31.02837    32.53883
---------+--------------------------------------------------------------------
    diff |     528    1.638939    .2214433     5.08838    1.203919    2.073959
------------------------------------------------------------------------------
     mean(diff) = mean(pred_young - pred_old)                     t =   7.4012
 H0: mean(diff) = 0                              Degrees of freedom =      527

 Ha: mean(diff) < 0           Ha: mean(diff) != 0           Ha: mean(diff) > 0
 Pr(T < t) = 1.0000         Pr(|T| > |t|) = 0.0000          Pr(T > t) = 0.0000

. local pred_mean0_old=r(mu_1)

. local pred_mean1_old=r(mu_2)

. local pred_sd0_old=r(sd_1)

. local pred_sd1_old=r(sd_2)

. local pred_diff_old=r(mu_1)-r(mu_2)

. local pred_p_old=r(p)

. 
. foreach X in female india old {
  2.     foreach Y in mean0 mean1 sd0 sd1 diff p{
  3.         local pred_`Y'_`X': di %6.2f `pred_`Y'_`X'' 
  4.     } 
  5. }

. 
. global label_female "Gender (1 = Male, 2 = Female)"

. global label_india "Country (1 = US, 2 = India)"

. global label_old "Age (1 = Under 33, 2 = Over 33)"

. 
. texdoc init "${tables}tableB6.tex", replace  
(texdoc output file is /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Arc
> hive/Results/tables/tableB6.tex)

. tex {\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}

. tex \begin{tabular}{l*{4}{c}}

. tex \hline\hline

. tex &\textbf{Group 1}&\textbf{Group 2}&\textbf{Diff.} &\textbf{p-val}\\

. tex & (1) & (2) & (3) & (4)\\

. tex \hline

. foreach X in female india old {
  2.     tex ${label_`X'} & `pred_mean0_`X'' & `pred_mean1_`X'' & `pred_diff_`X'' & `pred_p_`X'' \\
  3.     tex              & (`pred_sd0_`X'') & (`pred_sd1_`X'') &            &            \\
  4. }   

. tex \hline\hline

. tex \end{tabular}}    

. texdoc close
(texdoc output written to /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication 
> Archive/Results/tables/tableB6.tex)

. 
. ****************************************************************************************************
. *  4. Additional Stats
. ****************************************************************************************************
. 
. * Correlate beliefs & wages. In the text this is just before Table 3: 
. *    "We find positive correlations for all six groups of workers (Female: 0.12, Male: 0.12,
. *     India: 0.15, U.S.: 0.12, Over 33: 0.12, Under 33: 0.10)"
. corr wage pred_female if female_worker==1
(obs=7,778)

             |     wage pred_f~e
-------------+------------------
        wage |   1.0000
 pred_female |   0.1176   1.0000


. corr wage pred_male if female_worker==0
(obs=9,532)

             |     wage pred_m~e
-------------+------------------
        wage |   1.0000
   pred_male |   0.1208   1.0000


. corr wage pred_india if india_worker==1
(obs=5,825)

             |     wage pred_i~a
-------------+------------------
        wage |   1.0000
  pred_india |   0.1467   1.0000


. corr wage pred_us if india_worker==0
(obs=11,485)

             |     wage  pred_us
-------------+------------------
        wage |   1.0000
     pred_us |   0.1239   1.0000


. corr wage pred_old if old_worker==1
(obs=8,102)

             |     wage pred_old
-------------+------------------
        wage |   1.0000
    pred_old |   0.1227   1.0000


. corr wage pred_young if old_worker==0
(obs=9,208)

             |     wage pred_y~g
-------------+------------------
        wage |   1.0000
  pred_young |   0.1016   1.0000


. 
. * Balance Test for Incentives Treatment. In the text this is in the table notes for Table B4:
. *    "The joint f-statistic from regression of an indicator for the ``Incentivized'' treatment on 
. *     set of employer observable characteristics in \cref{Table_SummaryStatistics}, Panel B 
. *     (duration, education, age, female, from India) is 1.31 (p=0.260)."
. reg incentivized duration_employer educ_college_employer age_employer female_employer ///
>   india_employer if tag_employer==1

      Source |       SS           df       MS      Number of obs   =       577
-------------+----------------------------------   F(5, 571)       =      1.31
       Model |  1.63135504         5  .326271007   Prob > F        =    0.2596
    Residual |  142.614745       571  .249763127   R-squared       =    0.0113
-------------+----------------------------------   Adj R-squared   =    0.0027
       Total |  144.246101       576  .250427258   Root MSE        =    .49976

---------------------------------------------------------------------------------------
         incentivized | Coefficient  Std. err.      t    P>|t|     [95% conf. interval]
----------------------+----------------------------------------------------------------
    duration_employer |  -.0002556   .0012935    -0.20   0.843    -.0027961    .0022849
educ_college_employer |   .0719497   .0470975     1.53   0.127    -.0205558    .1644552
         age_employer |  -.0037457   .0019554    -1.92   0.056    -.0075864    .0000949
      female_employer |   .0294441   .0442177     0.67   0.506    -.0574051    .1162934
       india_employer |   .0015649   .0523325     0.03   0.976    -.1012228    .1043527
                _cons |   .5714966   .0793461     7.20   0.000     .4156507    .7273424
---------------------------------------------------------------------------------------

. testparm*

 ( 1)  duration_employer = 0
 ( 2)  educ_college_employer = 0
 ( 3)  age_employer = 0
 ( 4)  female_employer = 0
 ( 5)  india_employer = 0

       F(  5,   571) =    1.31
            Prob > F =    0.2596

. disp r(p)
.25957209

. 
. * First 10 profiles seen in Hiring Task 1 not diff than second 10 profiles in Hiring Task 1
. * In the text this is in footnote 35:
. *    "To investigate this channel, we perform a test comparing the average wages assigned in the 
. *     first 10 profiles and the second 10 profiles during the initial task. We do not find evidence 
. *     for an experience effect (36.86 vs. 36.72; p=.39)"
. gen first10=order<=10

. ttest score if hiringtask_number==1, by(first10)

Two-sample t test with equal variances
------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. err.   Std. dev.   [95% conf. interval]
---------+--------------------------------------------------------------------
       0 |   5,770    36.86066     .114925    8.729759    36.63536    37.08595
       1 |   5,770     36.7201     .116551    8.853275    36.49162    36.94859
---------+--------------------------------------------------------------------
Combined |  11,540    36.79038    .0818402    8.791634    36.62996     36.9508
---------+--------------------------------------------------------------------
    diff |            .1405546    .1636823               -.1802904    .4613996
------------------------------------------------------------------------------
    diff = mean(0) - mean(1)                                      t =   0.8587
H0: diff = 0                                     Degrees of freedom =    11538

    Ha: diff < 0                 Ha: diff != 0                 Ha: diff > 0
 Pr(T < t) = 0.8047         Pr(|T| > |t|) = 0.3905          Pr(T > t) = 0.1953

. 
. log close
      name:  <unnamed>
       log:  /Users/kareemhaggag/Dropbox/Inaccurate Statistical Discrimination/Replication Archive/Resul
> ts/log/inaccurate_discrimination.log
  log type:  text
 closed on:  12 Jul 2023, 15:46:46
--------------------------------------------------------------------------------------------------------
